All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] virtio: Add a new layer to abstract pci access method
@ 2016-01-18  9:13 Tetsuya Mukawa
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                   ` (3 more replies)
  0 siblings, 4 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-18  9:13 UTC (permalink / raw)
  To: dev

The patches abstract pci access method of virtio-net PMD.
The patch should be on Yuanhan's below patch series.
 - [PATCH v4 0/8] virtio 1.0 enabling for virtio pmd driver


Tetsuya Mukawa (3):
  virtio: Change the parameter order of io_write8/16/32()
  virtio: move rte_eal_pci_unmap_device() to virtio_pci.c
  virtio: Add a new layer to abstract pci access method

 drivers/net/virtio/virtio_ethdev.c |   4 +-
 drivers/net/virtio/virtio_pci.c    | 468 ++++++++++++++++++++++++++-----------
 drivers/net/virtio/virtio_pci.h    |  33 ++-
 3 files changed, 369 insertions(+), 136 deletions(-)

-- 
2.1.4

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32()
  2016-01-18  9:13 [PATCH 0/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
@ 2016-01-18  9:13 ` Tetsuya Mukawa
  2016-01-21 11:07   ` [RFC PATCH 0/5] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
                     ` (9 more replies)
  2016-01-18  9:13 ` [PATCH 2/3] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c Tetsuya Mukawa
                   ` (2 subsequent siblings)
  3 siblings, 10 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-18  9:13 UTC (permalink / raw)
  To: dev

The patch change the parameter order of below functions.
 - io_write8()
 - io_write16()
 - io_write32()
This changig are needed to add a new layer to abstract accessing
method.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_pci.c | 66 ++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index a9f179f..f1a6ee9 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -478,7 +478,7 @@ io_read##nr_bits(type *addr)			\
 
 #define MODERN_WRITE_DEF(nr_bits, type)		\
 static inline void				\
-io_write##nr_bits(type val, type *addr)		\
+io_write##nr_bits(type *addr, type val)		\
 {						\
 	*(volatile type *)addr = val;		\
 }
@@ -493,10 +493,10 @@ MODERN_READ_DEF (32, uint32_t)
 MODERN_WRITE_DEF(32, uint32_t)
 
 static inline void
-io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
+io_write64_twopart(uint32_t *lo, uint32_t *hi, uint64_t val)
 {
-	io_write32((uint32_t)val, lo);
-	io_write32(val >> 32,     hi);
+	io_write32(lo, (uint32_t)val);
+	io_write32(hi, val >> 32);
 }
 
 static void
@@ -526,7 +526,7 @@ modern_write_dev_config(struct virtio_hw *hw, uint64_t offset,
 	const uint8_t *p = src;
 
 	for (i = 0;  i < length; i++)
-		io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i);
+		io_write8((uint8_t *)hw->dev_cfg + offset + i, *p++);
 }
 
 static uint64_t
@@ -534,10 +534,10 @@ modern_get_features(struct virtio_hw *hw)
 {
 	uint32_t features_lo, features_hi;
 
-	io_write32(0, &hw->common_cfg->device_feature_select);
+	io_write32(&hw->common_cfg->device_feature_select, 0);
 	features_lo = io_read32(&hw->common_cfg->device_feature);
 
-	io_write32(1, &hw->common_cfg->device_feature_select);
+	io_write32(&hw->common_cfg->device_feature_select, 1);
 	features_hi = io_read32(&hw->common_cfg->device_feature);
 
 	return ((uint64_t)(features_hi) << 32) | features_lo;
@@ -546,13 +546,13 @@ modern_get_features(struct virtio_hw *hw)
 static void
 modern_set_features(struct virtio_hw *hw, uint64_t features)
 {
-	io_write32(0, &hw->common_cfg->guest_feature_select);
-	io_write32(features & ((1ULL << 32) - 1),
-		&hw->common_cfg->guest_feature);
+	io_write32(&hw->common_cfg->guest_feature_select, 0);
+	io_write32(&hw->common_cfg->guest_feature,
+		   features & ((1ULL << 32) - 1));
 
-	io_write32(1, &hw->common_cfg->guest_feature_select);
-	io_write32(features >> 32,
-		&hw->common_cfg->guest_feature);
+	io_write32(&hw->common_cfg->guest_feature_select, 1);
+	io_write32(&hw->common_cfg->guest_feature,
+		   features >> 32);
 }
 
 static uint8_t
@@ -564,7 +564,7 @@ modern_get_status(struct virtio_hw *hw)
 static void
 modern_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	io_write8(status, &hw->common_cfg->device_status);
+	io_write8(&hw->common_cfg->device_status, status);
 }
 
 static void
@@ -583,14 +583,14 @@ modern_get_isr(struct virtio_hw *hw)
 static uint16_t
 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-	io_write16(vec, &hw->common_cfg->msix_config);
+	io_write16(&hw->common_cfg->msix_config, vec);
 	return io_read16(&hw->common_cfg->msix_config);
 }
 
 static uint16_t
 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-	io_write16(queue_id, &hw->common_cfg->queue_select);
+	io_write16(&hw->common_cfg->queue_select, queue_id);
 	return io_read16(&hw->common_cfg->queue_size);
 }
 
@@ -606,20 +606,20 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 							 ring[vq->vq_nentries]),
 				   VIRTIO_PCI_VRING_ALIGN);
 
-	io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
-				      &hw->common_cfg->queue_desc_hi);
-	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
-				       &hw->common_cfg->queue_avail_hi);
-	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
-				      &hw->common_cfg->queue_used_hi);
+	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+			   &hw->common_cfg->queue_desc_hi, desc_addr);
+	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+			   &hw->common_cfg->queue_avail_hi, avail_addr);
+	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+			   &hw->common_cfg->queue_used_hi, used_addr);
 
 	notify_off = io_read16(&hw->common_cfg->queue_notify_off);
 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
 				notify_off * hw->notify_off_multiplier);
 
-	io_write16(1, &hw->common_cfg->queue_enable);
+	io_write16(&hw->common_cfg->queue_enable, 1);
 
 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %"PRIx64, desc_addr);
@@ -632,22 +632,22 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 static void
 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
-				  &hw->common_cfg->queue_desc_hi);
-	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
-				  &hw->common_cfg->queue_avail_hi);
-	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
-				  &hw->common_cfg->queue_used_hi);
+	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+			   &hw->common_cfg->queue_desc_hi, 0);
+	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+			   &hw->common_cfg->queue_avail_hi, 0);
+	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+			   &hw->common_cfg->queue_used_hi, 0);
 
-	io_write16(0, &hw->common_cfg->queue_enable);
+	io_write16(&hw->common_cfg->queue_enable, 0);
 }
 
 static void
 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 {
-	io_write16(1, vq->notify_addr);
+	io_write16(vq->notify_addr, 1);
 }
 
 static const struct virtio_pci_ops modern_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH 2/3] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c
  2016-01-18  9:13 [PATCH 0/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
@ 2016-01-18  9:13 ` Tetsuya Mukawa
  2016-01-18  9:13 ` [PATCH 3/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
  2016-01-18 13:13 ` [PATCH 0/3] " Tan, Jianfeng
  3 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-18  9:13 UTC (permalink / raw)
  To: dev

To abstract pci access method, the patch moves below function
to "virtio_pci.c".
 - rte_eal_pci_unmap_device()

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c |  2 +-
 drivers/net/virtio/virtio_pci.c    | 11 +++++++++++
 drivers/net/virtio/virtio_pci.h    |  1 +
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index deb0382..b98d195 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1177,7 +1177,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
-	rte_eal_pci_unmap_device(pci_dev);
+	vtpci_uninit(dev, hw)
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index f1a6ee9..ffcd2fa 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -872,3 +872,14 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
 
 	return 0;
 }
+
+void
+vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *hw)
+{
+	hw->dev  = NULL;
+	hw->vtpci_ops = NULL;
+	hw->use_msix = 0;
+	hw->io_base  = 0;
+	hw->modern   = 0;
+	rte_eal_pci_unmap_device(dev);
+}
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 99572a0..ec1d46b 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -328,6 +328,7 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
  * Function declaration from virtio_pci.c
  */
 int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
+void vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH 3/3] virtio: Add a new layer to abstract pci access method
  2016-01-18  9:13 [PATCH 0/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
  2016-01-18  9:13 ` [PATCH 2/3] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c Tetsuya Mukawa
@ 2016-01-18  9:13 ` Tetsuya Mukawa
  2016-01-18 13:46   ` Yuanhan Liu
  2016-01-18 13:13 ` [PATCH 0/3] " Tan, Jianfeng
  3 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-18  9:13 UTC (permalink / raw)
  To: dev

This patch adds below function pointers to abstract pci access method.
 - legacy_read8/16/32
 - legacy_write8/16/32
 - modern_read8/16/32
 - modern_write8/16/32
 - map_pci_cfg
 - unmap_pci_cfg
 - get_cfg_addr
 - read_pci_cfg

This layer will be used when virtio-net PMD supports container extension.
The legacy_xxxxx are for handling legacy virtio-net device, and
the modern_xxxxx are for handling virtio-1.0 device.
This new layer also abstract how to access to pci configuration space.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c |   4 +-
 drivers/net/virtio/virtio_pci.c    | 449 ++++++++++++++++++++++++++-----------
 drivers/net/virtio/virtio_pci.h    |  34 ++-
 3 files changed, 354 insertions(+), 133 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index b98d195..c477b05 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1037,7 +1037,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	pci_dev = eth_dev->pci_dev;
 
-	if (vtpci_init(pci_dev, hw) < 0)
+	if (vtpci_init(eth_dev, hw) < 0)
 		return -1;
 
 	/* Reset the device although not necessary at startup */
@@ -1177,7 +1177,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
-	vtpci_uninit(dev, hw)
+	vtpci_uninit(eth_dev, hw);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index ffcd2fa..20b64eb 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -68,6 +68,190 @@
 #define VIRTIO_WRITE_REG_4(hw, reg, value) \
 	outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
 
+static uint8_t
+phys_legacy_read8(struct virtio_hw *hw, uint8_t *addr)
+{
+	return VIRTIO_READ_REG_1(hw, (uint64_t)addr);
+}
+
+static uint16_t
+phys_legacy_read16(struct virtio_hw *hw, uint16_t *addr)
+{
+	return VIRTIO_READ_REG_2(hw, (uint64_t)addr);
+}
+
+static uint32_t
+phys_legacy_read32(struct virtio_hw *hw, uint32_t *addr)
+{
+	return VIRTIO_READ_REG_4(hw, (uint64_t)addr);
+}
+
+static void
+phys_legacy_write8(struct virtio_hw *hw, uint8_t *addr, uint8_t val)
+{
+	return VIRTIO_WRITE_REG_1(hw, (uint64_t)addr, val);
+}
+
+static void
+phys_legacy_write16(struct virtio_hw *hw, uint16_t *addr, uint16_t val)
+{
+	return VIRTIO_WRITE_REG_2(hw, (uint64_t)addr, val);
+}
+
+static void
+phys_legacy_write32(struct virtio_hw *hw, uint32_t *addr, uint32_t val)
+{
+	return VIRTIO_WRITE_REG_4(hw, (uint64_t)addr, val);
+}
+
+#define MODERN_READ_DEF(nr_bits, type) 		\
+static inline type				\
+io_read##nr_bits(type *addr)			\
+{						\
+	return *(volatile type *)addr;		\
+}
+
+#define MODERN_WRITE_DEF(nr_bits, type) 	\
+static inline void				\
+io_write##nr_bits(type *addr, type val)		\
+{						\
+	*(volatile type *)addr = val;		\
+}
+
+MODERN_READ_DEF (8, uint8_t)
+MODERN_WRITE_DEF(8, uint8_t)
+
+MODERN_READ_DEF (16, uint16_t)
+MODERN_WRITE_DEF(16, uint16_t)
+
+MODERN_READ_DEF (32, uint32_t)
+MODERN_WRITE_DEF(32, uint32_t)
+
+static uint8_t
+phys_modern_read8(struct virtio_hw *hw __rte_unused, uint8_t *addr)
+{
+	return io_read8((uint8_t *)addr);
+}
+
+static uint16_t
+phys_modern_read16(struct virtio_hw *hw __rte_unused, uint16_t *addr)
+{
+	return io_read16((uint16_t *)addr);
+}
+
+static uint32_t
+phys_modern_read32(struct virtio_hw *hw __rte_unused, uint32_t *addr)
+{
+	return io_read32((uint32_t *)addr);
+}
+
+static void
+phys_modern_write8(struct virtio_hw *hw __rte_unused,
+		uint8_t *addr, uint8_t val)
+{
+	return io_write8((uint8_t *)addr, val);
+}
+
+static void
+phys_modern_write16(struct virtio_hw *hw __rte_unused,
+		uint16_t *addr, uint16_t val)
+{
+	return io_write16((uint16_t *)addr, val);
+}
+
+static void
+phys_modern_write32(struct virtio_hw *hw __rte_unused,
+		uint32_t *addr, uint32_t val)
+{
+	return io_write32((uint32_t *)addr, val);
+}
+
+static int
+phys_map_pci_cfg(struct virtio_hw *hw)
+{
+	return rte_eal_pci_map_device(hw->dev);
+}
+
+static void
+phys_unmap_pci_cfg(struct virtio_hw *hw)
+{
+	rte_eal_pci_unmap_device(hw->dev);
+}
+
+static int
+phys_read_pci_cfg(struct virtio_hw *hw, void *buf, size_t len, off_t offset)
+{
+	return rte_eal_pci_read_config(hw->dev, buf, len, offset);
+}
+
+static void *
+phys_get_cfg_addr(struct virtio_hw *hw, struct virtio_pci_cap *cap)
+{
+	uint8_t  bar    = cap->bar;
+	uint32_t length = cap->length;
+	uint32_t offset = cap->offset;
+	uint8_t *base;
+
+	if (bar > 5) {
+		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
+		return NULL;
+	}
+
+	if (offset + length > hw->dev->mem_resource[bar].len) {
+		PMD_INIT_LOG(ERR,
+			"invalid cap: overflows bar space: %u > %"PRIu64,
+			offset + length, hw->dev->mem_resource[bar].len);
+		return NULL;
+	}
+
+	base = hw->dev->mem_resource[bar].addr;
+	if (base == NULL) {
+		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
+		return NULL;
+	}
+
+	return base + offset;
+}
+
+static const struct virtio_pci_access_ops phys_access_ops = {
+	.legacy_read8		= phys_legacy_read8,
+	.legacy_read16		= phys_legacy_read16,
+	.legacy_read32		= phys_legacy_read32,
+	.legacy_write8		= phys_legacy_write8,
+	.legacy_write16		= phys_legacy_write16,
+	.legacy_write32		= phys_legacy_write32,
+
+	.modern_read8		= phys_modern_read8,
+	.modern_read16		= phys_modern_read16,
+	.modern_read32		= phys_modern_read32,
+	.modern_write8		= phys_modern_write8,
+	.modern_write16		= phys_modern_write16,
+	.modern_write32		= phys_modern_write32,
+
+	.map_pci_cfg		= phys_map_pci_cfg,
+	.unmap_pci_cfg		= phys_unmap_pci_cfg,
+	.read_pci_cfg		= phys_read_pci_cfg,
+	.get_cfg_addr		= phys_get_cfg_addr,
+};
+
+static int
+vtpci_access_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
+{
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		hw->vtpci_access_ops = &phys_access_ops;
+		return 0;
+	}
+
+	PMD_DRV_LOG(ERR, "Unkown virtio-net device.");
+	return -1;
+}
+
+static void
+vtpci_access_uninit(struct rte_eth_dev *dev __rte_unused, struct virtio_hw *hw)
+{
+	hw->vtpci_access_ops = NULL;
+}
+
 static void
 legacy_read_dev_config(struct virtio_hw *hw, uint64_t offset,
 		       void *dst, int length)
@@ -80,13 +264,16 @@ legacy_read_dev_config(struct virtio_hw *hw, uint64_t offset,
 	for (d = dst; length > 0; d += size, off += size, length -= size) {
 		if (length >= 4) {
 			size = 4;
-			*(uint32_t *)d = VIRTIO_READ_REG_4(hw, off);
+			*(uint32_t *)d = hw->vtpci_access_ops->legacy_read32(
+						hw, (uint32_t *)off);
 		} else if (length >= 2) {
 			size = 2;
-			*(uint16_t *)d = VIRTIO_READ_REG_2(hw, off);
+			*(uint16_t *)d = hw->vtpci_access_ops->legacy_read16(
+						hw, (uint16_t *)off);
 		} else {
 			size = 1;
-			*d = VIRTIO_READ_REG_1(hw, off);
+			*d = hw->vtpci_access_ops->legacy_read8(
+						hw, (uint8_t *)off);
 		}
 	}
 }
@@ -103,13 +290,16 @@ legacy_write_dev_config(struct virtio_hw *hw, uint64_t offset,
 	for (s = src; length > 0; s += size, off += size, length -= size) {
 		if (length >= 4) {
 			size = 4;
-			VIRTIO_WRITE_REG_4(hw, off, *(const uint32_t *)s);
+			hw->vtpci_access_ops->legacy_write32(hw,
+					(uint32_t *)off, *(const uint32_t *)s);
 		} else if (length >= 2) {
 			size = 2;
-			VIRTIO_WRITE_REG_2(hw, off, *(const uint16_t *)s);
+			hw->vtpci_access_ops->legacy_write16(hw,
+					(uint16_t *)off, *(const uint16_t *)s);
 		} else {
 			size = 1;
-			VIRTIO_WRITE_REG_1(hw, off, *s);
+			hw->vtpci_access_ops->legacy_write8(hw,
+					(uint8_t *)off, *s);
 		}
 	}
 }
@@ -117,7 +307,8 @@ legacy_write_dev_config(struct virtio_hw *hw, uint64_t offset,
 static uint64_t
 legacy_get_features(struct virtio_hw *hw)
 {
-	return VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES);
+	return hw->vtpci_access_ops->legacy_read32(hw,
+			(uint32_t *)VIRTIO_PCI_HOST_FEATURES);
 }
 
 static void
@@ -128,19 +319,22 @@ legacy_set_features(struct virtio_hw *hw, uint64_t features)
 			"only 32 bit features are allowed for legacy virtio!");
 		return;
 	}
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_GUEST_FEATURES, features);
+	hw->vtpci_access_ops->legacy_write32(hw,
+			(uint32_t *)VIRTIO_PCI_GUEST_FEATURES, features);
 }
 
 static uint8_t
 legacy_get_status(struct virtio_hw *hw)
 {
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_STATUS);
+	return hw->vtpci_access_ops->legacy_read8(hw,
+			(uint8_t *)VIRTIO_PCI_STATUS);
 }
 
 static void
 legacy_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	VIRTIO_WRITE_REG_1(hw, VIRTIO_PCI_STATUS, status);
+	hw->vtpci_access_ops->legacy_write8(hw,
+			(uint8_t *)VIRTIO_PCI_STATUS, status);
 }
 
 static void
@@ -152,45 +346,56 @@ legacy_reset(struct virtio_hw *hw)
 static uint8_t
 legacy_get_isr(struct virtio_hw *hw)
 {
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_ISR);
+	return hw->vtpci_access_ops->legacy_read8(hw,
+			(uint8_t *)VIRTIO_PCI_ISR);
 }
 
 /* Enable one vector (0) for Link State Intrerrupt */
 static uint16_t
 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR, vec);
-	return VIRTIO_READ_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR);
+	hw->vtpci_access_ops->legacy_write16(hw,
+			(uint16_t *)VIRTIO_MSI_CONFIG_VECTOR, vec);
+	return hw->vtpci_access_ops->legacy_read16(hw,
+			(uint16_t *)VIRTIO_MSI_CONFIG_VECTOR);
 }
 
 static uint16_t
 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, queue_id);
-	return VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
+	hw->vtpci_access_ops->legacy_write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_SEL, queue_id);
+	return hw->vtpci_access_ops->legacy_read16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_NUM);
 }
 
 static void
 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
+	hw->vtpci_access_ops->legacy_write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
 
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN,
-		vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	hw->vtpci_access_ops->legacy_write32(hw,
+			(uint32_t *)VIRTIO_PCI_QUEUE_PFN,
+			vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
 }
 
 static void
 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
+	hw->vtpci_access_ops->legacy_write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
 
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, 0);
+	hw->vtpci_access_ops->legacy_write32(hw,
+			(uint32_t *)VIRTIO_PCI_QUEUE_PFN, 0);
 }
 
 static void
 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_NOTIFY, vq->vq_queue_index);
+	hw->vtpci_access_ops->legacy_write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_NOTIFY,
+			vq->vq_queue_index);
 }
 
 #ifdef RTE_EXEC_ENV_LINUXAPP
@@ -468,35 +673,12 @@ static const struct virtio_pci_ops legacy_ops = {
 	.notify_queue	= legacy_notify_queue,
 };
 
-
-#define MODERN_READ_DEF(nr_bits, type)		\
-static inline type				\
-io_read##nr_bits(type *addr)			\
-{						\
-	return *(volatile type *)addr;		\
-}
-
-#define MODERN_WRITE_DEF(nr_bits, type)		\
-static inline void				\
-io_write##nr_bits(type *addr, type val)		\
-{						\
-	*(volatile type *)addr = val;		\
-}
-
-MODERN_READ_DEF (8, uint8_t)
-MODERN_WRITE_DEF(8, uint8_t)
-
-MODERN_READ_DEF (16, uint16_t)
-MODERN_WRITE_DEF(16, uint16_t)
-
-MODERN_READ_DEF (32, uint32_t)
-MODERN_WRITE_DEF(32, uint32_t)
-
 static inline void
-io_write64_twopart(uint32_t *lo, uint32_t *hi, uint64_t val)
+io_write64_twopart(struct virtio_hw *hw,
+		   uint32_t *lo, uint32_t *hi, uint64_t val)
 {
-	io_write32(lo, (uint32_t)val);
-	io_write32(hi, val >> 32);
+	hw->vtpci_access_ops->modern_write32(hw, lo, (uint32_t)val);
+	hw->vtpci_access_ops->modern_write32(hw, hi, val >> 32);
 }
 
 static void
@@ -508,13 +690,16 @@ modern_read_dev_config(struct virtio_hw *hw, uint64_t offset,
 	uint8_t old_gen, new_gen;
 
 	do {
-		old_gen = io_read8(&hw->common_cfg->config_generation);
+		old_gen = hw->vtpci_access_ops->modern_read8(hw,
+				&hw->common_cfg->config_generation);
 
 		p = dst;
 		for (i = 0;  i < length; i++)
-			*p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i);
+			*p++ = hw->vtpci_access_ops->modern_read8(hw,
+					(uint8_t *)hw->dev_cfg + offset + i);
 
-		new_gen = io_read8(&hw->common_cfg->config_generation);
+		new_gen = hw->vtpci_access_ops->modern_read8(hw,
+				&hw->common_cfg->config_generation);
 	} while (old_gen != new_gen);
 }
 
@@ -526,7 +711,8 @@ modern_write_dev_config(struct virtio_hw *hw, uint64_t offset,
 	const uint8_t *p = src;
 
 	for (i = 0;  i < length; i++)
-		io_write8((uint8_t *)hw->dev_cfg + offset + i, *p++);
+		hw->vtpci_access_ops->modern_write8(hw,
+				(uint8_t *)hw->dev_cfg + offset + i, *p++);
 }
 
 static uint64_t
@@ -534,11 +720,15 @@ modern_get_features(struct virtio_hw *hw)
 {
 	uint32_t features_lo, features_hi;
 
-	io_write32(&hw->common_cfg->device_feature_select, 0);
-	features_lo = io_read32(&hw->common_cfg->device_feature);
+	hw->vtpci_access_ops->modern_write32(hw,
+			&hw->common_cfg->device_feature_select, 0);
+	features_lo = hw->vtpci_access_ops->modern_read32(hw,
+			&hw->common_cfg->device_feature);
 
-	io_write32(&hw->common_cfg->device_feature_select, 1);
-	features_hi = io_read32(&hw->common_cfg->device_feature);
+	hw->vtpci_access_ops->modern_write32(hw,
+			&hw->common_cfg->device_feature_select, 1);
+	features_hi = hw->vtpci_access_ops->modern_read32(hw,
+			&hw->common_cfg->device_feature);
 
 	return ((uint64_t)(features_hi) << 32) | features_lo;
 }
@@ -546,25 +736,30 @@ modern_get_features(struct virtio_hw *hw)
 static void
 modern_set_features(struct virtio_hw *hw, uint64_t features)
 {
-	io_write32(&hw->common_cfg->guest_feature_select, 0);
-	io_write32(&hw->common_cfg->guest_feature,
-		   features & ((1ULL << 32) - 1));
+	hw->vtpci_access_ops->modern_write32(hw,
+			&hw->common_cfg->guest_feature_select, 0);
+	hw->vtpci_access_ops->modern_write32(hw,
+			&hw->common_cfg->guest_feature,
+			features & ((1ULL << 32) - 1));
 
-	io_write32(&hw->common_cfg->guest_feature_select, 1);
-	io_write32(&hw->common_cfg->guest_feature,
-		   features >> 32);
+	hw->vtpci_access_ops->modern_write32(hw,
+			&hw->common_cfg->guest_feature_select, 1);
+	hw->vtpci_access_ops->modern_write32(hw,
+			&hw->common_cfg->guest_feature, features >> 32);
 }
 
 static uint8_t
 modern_get_status(struct virtio_hw *hw)
 {
-	return io_read8(&hw->common_cfg->device_status);
+	return hw->vtpci_access_ops->modern_read8(hw,
+			&hw->common_cfg->device_status);
 }
 
 static void
 modern_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	io_write8(&hw->common_cfg->device_status, status);
+	hw->vtpci_access_ops->modern_write8(hw,
+			&hw->common_cfg->device_status, status);
 }
 
 static void
@@ -577,21 +772,25 @@ modern_reset(struct virtio_hw *hw)
 static uint8_t
 modern_get_isr(struct virtio_hw *hw)
 {
-	return io_read8(hw->isr);
+	return hw->vtpci_access_ops->modern_read8(hw, hw->isr);
 }
 
 static uint16_t
 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-	io_write16(&hw->common_cfg->msix_config, vec);
-	return io_read16(&hw->common_cfg->msix_config);
+	hw->vtpci_access_ops->modern_write16(hw,
+			&hw->common_cfg->msix_config, vec);
+	return hw->vtpci_access_ops->modern_read16(hw,
+			&hw->common_cfg->msix_config);
 }
 
 static uint16_t
 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-	io_write16(&hw->common_cfg->queue_select, queue_id);
-	return io_read16(&hw->common_cfg->queue_size);
+	hw->vtpci_access_ops->modern_write16(hw,
+			&hw->common_cfg->queue_select, queue_id);
+	return hw->vtpci_access_ops->modern_read16(hw,
+			&hw->common_cfg->queue_size);
 }
 
 static void
@@ -606,20 +805,23 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 							 ring[vq->vq_nentries]),
 				   VIRTIO_PCI_VRING_ALIGN);
 
-	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
+	hw->vtpci_access_ops->modern_write16(hw,
+			&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_desc_lo,
 			   &hw->common_cfg->queue_desc_hi, desc_addr);
-	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_avail_lo,
 			   &hw->common_cfg->queue_avail_hi, avail_addr);
-	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_used_lo,
 			   &hw->common_cfg->queue_used_hi, used_addr);
 
-	notify_off = io_read16(&hw->common_cfg->queue_notify_off);
+	notify_off = hw->vtpci_access_ops->modern_read16(hw,
+				&hw->common_cfg->queue_notify_off);
 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
 				notify_off * hw->notify_off_multiplier);
 
-	io_write16(&hw->common_cfg->queue_enable, 1);
+	hw->vtpci_access_ops->modern_write16(hw,
+			&hw->common_cfg->queue_enable, 1);
 
 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %"PRIx64, desc_addr);
@@ -632,22 +834,24 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 static void
 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
+	hw->vtpci_access_ops->modern_write16(hw,
+			&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_desc_lo,
 			   &hw->common_cfg->queue_desc_hi, 0);
-	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_avail_lo,
 			   &hw->common_cfg->queue_avail_hi, 0);
-	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_used_lo,
 			   &hw->common_cfg->queue_used_hi, 0);
 
-	io_write16(&hw->common_cfg->queue_enable, 0);
+	hw->vtpci_access_ops->modern_write16(hw,
+			&hw->common_cfg->queue_enable, 0);
 }
 
 static void
 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 {
-	io_write16(vq->notify_addr, 1);
+	hw->vtpci_access_ops->modern_write16(hw, vq->notify_addr, 1);
 }
 
 static const struct virtio_pci_ops modern_ops = {
@@ -666,7 +870,6 @@ static const struct virtio_pci_ops modern_ops = {
 	.notify_queue	= modern_notify_queue,
 };
 
-
 void
 vtpci_read_dev_config(struct virtio_hw *hw, uint64_t offset,
 		      void *dst, int length)
@@ -739,55 +942,28 @@ vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
 	return hw->vtpci_ops->set_config_irq(hw, vec);
 }
 
-static void *
-get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
-{
-	uint8_t  bar    = cap->bar;
-	uint32_t length = cap->length;
-	uint32_t offset = cap->offset;
-	uint8_t *base;
-
-	if (bar > 5) {
-		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
-		return NULL;
-	}
-
-	if (offset + length > dev->mem_resource[bar].len) {
-		PMD_INIT_LOG(ERR,
-			"invalid cap: overflows bar space: %u > %"PRIu64,
-			offset + length, dev->mem_resource[bar].len);
-		return NULL;
-	}
-
-	base = dev->mem_resource[bar].addr;
-	if (base == NULL) {
-		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
-		return NULL;
-	}
-
-	return base + offset;
-}
-
 static int
-virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
+virtio_read_caps(struct virtio_hw *hw)
 {
 	uint8_t pos;
 	struct virtio_pci_cap cap;
 	int ret;
 
-	if (rte_eal_pci_map_device(dev) < 0) {
+	if (hw->vtpci_access_ops->map_pci_cfg(hw) < 0) {
 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
 		return -1;
 	}
 
-	ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+	ret = hw->vtpci_access_ops->read_pci_cfg(
+			hw, &pos, 1, PCI_CAPABILITY_LIST);
 	if (ret < 0) {
 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
 		return -1;
 	}
 
 	while (pos) {
-		ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos);
+		ret = hw->vtpci_access_ops->read_pci_cfg(
+				hw, &cap, sizeof(cap), pos);
 		if (ret < 0) {
 			PMD_INIT_LOG(ERR,
 				"failed to read pci cap at pos: %x", pos);
@@ -807,18 +983,22 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 
 		switch (cap.cfg_type) {
 		case VIRTIO_PCI_CAP_COMMON_CFG:
-			hw->common_cfg = get_cfg_addr(dev, &cap);
+			hw->common_cfg =
+				hw->vtpci_access_ops->get_cfg_addr(hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
-			rte_eal_pci_read_config(dev, &hw->notify_off_multiplier,
+			hw->vtpci_access_ops->read_pci_cfg(hw,
+						&hw->notify_off_multiplier,
 						4, pos + sizeof(cap));
-			hw->notify_base = get_cfg_addr(dev, &cap);
+			hw->notify_base =
+				hw->vtpci_access_ops->get_cfg_addr(hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_DEVICE_CFG:
-			hw->dev_cfg = get_cfg_addr(dev, &cap);
+			hw->dev_cfg =
+				hw->vtpci_access_ops->get_cfg_addr(hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_ISR_CFG:
-			hw->isr = get_cfg_addr(dev, &cap);
+			hw->isr = hw->vtpci_access_ops->get_cfg_addr(hw, &cap);
 			break;
 		}
 
@@ -844,42 +1024,53 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 }
 
 int
-vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
 {
-	hw->dev = dev;
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+
+	hw->dev = pci_dev;
+
+	if (pci_dev == NULL) {
+		PMD_INIT_LOG(INFO, "No pci device specified.");
+		return -1;
+	}
+
+	if (vtpci_access_init(eth_dev, hw) < 0)
+		return -1;
 
 	/*
 	 * Try if we can succeed reading virtio pci caps, which exists
 	 * only on modern pci device. If failed, we fallback to legacy
 	 * virtio handling.
 	 */
-	if (virtio_read_caps(dev, hw) == 0) {
+	if (virtio_read_caps(hw) == 0) {
 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
 		hw->vtpci_ops = &modern_ops;
 		hw->modern    = 1;
-		dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+		pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
 		return 0;
 	}
 
 	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
-	if (legacy_virtio_resource_init(dev) < 0)
+	if (legacy_virtio_resource_init(pci_dev) < 0)
 		return -1;
 
 	hw->vtpci_ops = &legacy_ops;
-	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
-	hw->io_base  = (uint32_t)(uintptr_t)dev->mem_resource[0].addr;
+	hw->use_msix = legacy_virtio_has_msix(&pci_dev->addr);
+	hw->io_base  = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
 	hw->modern   = 0;
 
 	return 0;
 }
 
 void
-vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_uninit(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
 {
 	hw->dev  = NULL;
 	hw->vtpci_ops = NULL;
 	hw->use_msix = 0;
 	hw->io_base  = 0;
 	hw->modern   = 0;
-	rte_eal_pci_unmap_device(dev);
+	hw->vtpci_access_ops->unmap_pci_cfg(hw);
+	vtpci_access_uninit(eth_dev, hw);
 }
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index ec1d46b..b102679 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -222,6 +222,35 @@ struct virtio_pci_common_cfg {
 
 struct virtio_hw;
 
+struct virtio_pci_access_ops {
+	uint8_t (*legacy_read8)(struct virtio_hw *hw, uint8_t *addr);
+	uint16_t (*legacy_read16)(struct virtio_hw *hw, uint16_t *addr);
+	uint32_t (*legacy_read32)(struct virtio_hw *hw, uint32_t *addr);
+	void (*legacy_write8)(struct virtio_hw *hw,
+				uint8_t *addr, uint8_t val);
+	void (*legacy_write16)(struct virtio_hw *hw,
+				uint16_t *addr, uint16_t val);
+	void (*legacy_write32)(struct virtio_hw *hw,
+				uint32_t *addr, uint32_t val);
+
+	uint8_t (*modern_read8)(struct virtio_hw *hw, uint8_t *addr);
+	uint16_t (*modern_read16)(struct virtio_hw *hw, uint16_t *addr);
+	uint32_t (*modern_read32)(struct virtio_hw *hw, uint32_t *addr);
+	void (*modern_write8)(struct virtio_hw *hw,
+				uint8_t *addr, uint8_t val);
+	void (*modern_write16)(struct virtio_hw *hw,
+				uint16_t *addr, uint16_t val);
+	void (*modern_write32)(struct virtio_hw *hw,
+				uint32_t *addr, uint32_t val);
+
+	int (*map_pci_cfg)(struct virtio_hw *hw);
+	void (*unmap_pci_cfg)(struct virtio_hw *hw);
+	void *(*get_cfg_addr)(struct virtio_hw *hw,
+				struct virtio_pci_cap *cap);
+	int (*read_pci_cfg)(struct virtio_hw *hw,
+				void *buf, size_t len, off_t offset);
+};
+
 struct virtio_pci_ops {
 	void (*read_dev_cfg)(struct virtio_hw *hw, uint64_t offset,
 			     void *dst, int len);
@@ -266,6 +295,7 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	const struct virtio_pci_ops *vtpci_ops;
+	const struct virtio_pci_access_ops *vtpci_access_ops;
 };
 
 /*
@@ -327,8 +357,8 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
 /*
  * Function declaration from virtio_pci.c
  */
-int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
-void vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *);
+int vtpci_init(struct rte_eth_dev *, struct virtio_hw *);
+void vtpci_uninit(struct rte_eth_dev *, struct virtio_hw *);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* Re: [PATCH 0/3] virtio: Add a new layer to abstract pci access method
  2016-01-18  9:13 [PATCH 0/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
                   ` (2 preceding siblings ...)
  2016-01-18  9:13 ` [PATCH 3/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
@ 2016-01-18 13:13 ` Tan, Jianfeng
  2016-01-19  1:22   ` Tetsuya Mukawa
  3 siblings, 1 reply; 120+ messages in thread
From: Tan, Jianfeng @ 2016-01-18 13:13 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev

Hi Tetsuya,

On 1/18/2016 5:13 PM, Tetsuya Mukawa wrote:
> The patches abstract pci access method of virtio-net PMD.
> The patch should be on Yuanhan's below patch series.
>   - [PATCH v4 0/8] virtio 1.0 enabling for virtio pmd driver
>
>
> Tetsuya Mukawa (3):
>    virtio: Change the parameter order of io_write8/16/32()
>    virtio: move rte_eal_pci_unmap_device() to virtio_pci.c
>    virtio: Add a new layer to abstract pci access method
>
>   drivers/net/virtio/virtio_ethdev.c |   4 +-
>   drivers/net/virtio/virtio_pci.c    | 468 ++++++++++++++++++++++++++-----------
>   drivers/net/virtio/virtio_pci.h    |  33 ++-
>   3 files changed, 369 insertions(+), 136 deletions(-)
>

Now I believe we will become more clear about the difference of our two 
implementations.

I was planning to just implement another struct virtio_pci_ops because 
it's going the long way round for my implementation to translate 
virtio_pci_ops to ioport/pci configuration space rd/wr then back to 
sendmsg/ioctl. And in my implementation, there's no need to 
differentiate legacy/modern device.

As I understand, your implementation does not need another 
implementation of struct virtio_pci_ops, but you need different 
implementation in lower layer as this patch show. You want to support 
both legacy/modern device, right?

By the way, this patch looks good to me.

Thanks,
Jianfeng

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH 3/3] virtio: Add a new layer to abstract pci access method
  2016-01-18  9:13 ` [PATCH 3/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
@ 2016-01-18 13:46   ` Yuanhan Liu
  2016-01-19  1:22     ` Tetsuya Mukawa
  2016-01-19  2:41     ` Xie, Huawei
  0 siblings, 2 replies; 120+ messages in thread
From: Yuanhan Liu @ 2016-01-18 13:46 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev

On Mon, Jan 18, 2016 at 06:13:09PM +0900, Tetsuya Mukawa wrote:
> +struct virtio_pci_access_ops {
> +	uint8_t (*legacy_read8)(struct virtio_hw *hw, uint8_t *addr);
> +	uint16_t (*legacy_read16)(struct virtio_hw *hw, uint16_t *addr);
> +	uint32_t (*legacy_read32)(struct virtio_hw *hw, uint32_t *addr);
> +	void (*legacy_write8)(struct virtio_hw *hw,
> +				uint8_t *addr, uint8_t val);
> +	void (*legacy_write16)(struct virtio_hw *hw,
> +				uint16_t *addr, uint16_t val);
> +	void (*legacy_write32)(struct virtio_hw *hw,
> +				uint32_t *addr, uint32_t val);
> +
> +	uint8_t (*modern_read8)(struct virtio_hw *hw, uint8_t *addr);
> +	uint16_t (*modern_read16)(struct virtio_hw *hw, uint16_t *addr);
> +	uint32_t (*modern_read32)(struct virtio_hw *hw, uint32_t *addr);
> +	void (*modern_write8)(struct virtio_hw *hw,
> +				uint8_t *addr, uint8_t val);
> +	void (*modern_write16)(struct virtio_hw *hw,
> +				uint16_t *addr, uint16_t val);
> +	void (*modern_write32)(struct virtio_hw *hw,
> +				uint32_t *addr, uint32_t val);

One thing about abstraction is that you need define one set of operations,
instead of two similar sets. Thus, you need define following operations
only:

  - read8
  - read16
  - read32
  - write8
  - write16
  - write32

And make a proper assignment after the modern/legacy detection.

> +
> +	int (*map_pci_cfg)(struct virtio_hw *hw);
> +	void (*unmap_pci_cfg)(struct virtio_hw *hw);
> +	void *(*get_cfg_addr)(struct virtio_hw *hw,
> +				struct virtio_pci_cap *cap);
> +	int (*read_pci_cfg)(struct virtio_hw *hw,
> +				void *buf, size_t len, off_t offset);

It'd be good if you can post the patches that use above abstract
operations, so that people can tell if they are properly defined.

	--yliu

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH 0/3] virtio: Add a new layer to abstract pci access method
  2016-01-18 13:13 ` [PATCH 0/3] " Tan, Jianfeng
@ 2016-01-19  1:22   ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-19  1:22 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 2016/01/18 22:13, Tan, Jianfeng wrote:
> Hi Tetsuya,
>
> On 1/18/2016 5:13 PM, Tetsuya Mukawa wrote:
>> The patches abstract pci access method of virtio-net PMD.
>> The patch should be on Yuanhan's below patch series.
>>   - [PATCH v4 0/8] virtio 1.0 enabling for virtio pmd driver
>>
>>
>> Tetsuya Mukawa (3):
>>    virtio: Change the parameter order of io_write8/16/32()
>>    virtio: move rte_eal_pci_unmap_device() to virtio_pci.c
>>    virtio: Add a new layer to abstract pci access method
>>
>>   drivers/net/virtio/virtio_ethdev.c |   4 +-
>>   drivers/net/virtio/virtio_pci.c    | 468
>> ++++++++++++++++++++++++++-----------
>>   drivers/net/virtio/virtio_pci.h    |  33 ++-
>>   3 files changed, 369 insertions(+), 136 deletions(-)
>>
>
> Now I believe we will become more clear about the difference of our
> two implementations.
>
> I was planning to just implement another struct virtio_pci_ops because
> it's going the long way round for my implementation to translate
> virtio_pci_ops to ioport/pci configuration space rd/wr then back to
> sendmsg/ioctl. And in my implementation, there's no need to
> differentiate legacy/modern device.
>
> As I understand, your implementation does not need another
> implementation of struct virtio_pci_ops, but you need different
> implementation in lower layer as this patch show. You want to support
> both legacy/modern device, right?

Thanks for comments.
Yes, I want to support both legacy and modern virtio devices.
I will rebase my container patches on this abstraction, then submit it
again.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH 3/3] virtio: Add a new layer to abstract pci access method
  2016-01-18 13:46   ` Yuanhan Liu
@ 2016-01-19  1:22     ` Tetsuya Mukawa
  2016-01-19  2:41     ` Xie, Huawei
  1 sibling, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-19  1:22 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev

On 2016/01/18 22:46, Yuanhan Liu wrote:
> On Mon, Jan 18, 2016 at 06:13:09PM +0900, Tetsuya Mukawa wrote:
>> +struct virtio_pci_access_ops {
>> +	uint8_t (*legacy_read8)(struct virtio_hw *hw, uint8_t *addr);
>> +	uint16_t (*legacy_read16)(struct virtio_hw *hw, uint16_t *addr);
>> +	uint32_t (*legacy_read32)(struct virtio_hw *hw, uint32_t *addr);
>> +	void (*legacy_write8)(struct virtio_hw *hw,
>> +				uint8_t *addr, uint8_t val);
>> +	void (*legacy_write16)(struct virtio_hw *hw,
>> +				uint16_t *addr, uint16_t val);
>> +	void (*legacy_write32)(struct virtio_hw *hw,
>> +				uint32_t *addr, uint32_t val);
>> +
>> +	uint8_t (*modern_read8)(struct virtio_hw *hw, uint8_t *addr);
>> +	uint16_t (*modern_read16)(struct virtio_hw *hw, uint16_t *addr);
>> +	uint32_t (*modern_read32)(struct virtio_hw *hw, uint32_t *addr);
>> +	void (*modern_write8)(struct virtio_hw *hw,
>> +				uint8_t *addr, uint8_t val);
>> +	void (*modern_write16)(struct virtio_hw *hw,
>> +				uint16_t *addr, uint16_t val);
>> +	void (*modern_write32)(struct virtio_hw *hw,
>> +				uint32_t *addr, uint32_t val);
> One thing about abstraction is that you need define one set of operations,
> instead of two similar sets. Thus, you need define following operations
> only:
>
>   - read8
>   - read16
>   - read32
>   - write8
>   - write16
>   - write32
>
> And make a proper assignment after the modern/legacy detection.

Thanks for comments.
Sure, will do.

>
>> +
>> +	int (*map_pci_cfg)(struct virtio_hw *hw);
>> +	void (*unmap_pci_cfg)(struct virtio_hw *hw);
>> +	void *(*get_cfg_addr)(struct virtio_hw *hw,
>> +				struct virtio_pci_cap *cap);
>> +	int (*read_pci_cfg)(struct virtio_hw *hw,
>> +				void *buf, size_t len, off_t offset);
> It'd be good if you can post the patches that use above abstract
> operations, so that people can tell if they are properly defined.
>

Yes, I will submit it again.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH 3/3] virtio: Add a new layer to abstract pci access method
  2016-01-18 13:46   ` Yuanhan Liu
  2016-01-19  1:22     ` Tetsuya Mukawa
@ 2016-01-19  2:41     ` Xie, Huawei
  1 sibling, 0 replies; 120+ messages in thread
From: Xie, Huawei @ 2016-01-19  2:41 UTC (permalink / raw)
  To: Yuanhan Liu, Tetsuya Mukawa; +Cc: dev

On 1/18/2016 9:44 PM, Yuanhan Liu wrote:
> On Mon, Jan 18, 2016 at 06:13:09PM +0900, Tetsuya Mukawa wrote:
>> +struct virtio_pci_access_ops {
>> +	uint8_t (*legacy_read8)(struct virtio_hw *hw, uint8_t *addr);
>> +	uint16_t (*legacy_read16)(struct virtio_hw *hw, uint16_t *addr);
>> +	uint32_t (*legacy_read32)(struct virtio_hw *hw, uint32_t *addr);
>> +	void (*legacy_write8)(struct virtio_hw *hw,
>> +				uint8_t *addr, uint8_t val);
>> +	void (*legacy_write16)(struct virtio_hw *hw,
>> +				uint16_t *addr, uint16_t val);
>> +	void (*legacy_write32)(struct virtio_hw *hw,
>> +				uint32_t *addr, uint32_t val);
>> +
>> +	uint8_t (*modern_read8)(struct virtio_hw *hw, uint8_t *addr);
>> +	uint16_t (*modern_read16)(struct virtio_hw *hw, uint16_t *addr);
>> +	uint32_t (*modern_read32)(struct virtio_hw *hw, uint32_t *addr);
>> +	void (*modern_write8)(struct virtio_hw *hw,
>> +				uint8_t *addr, uint8_t val);
>> +	void (*modern_write16)(struct virtio_hw *hw,
>> +				uint16_t *addr, uint16_t val);
>> +	void (*modern_write32)(struct virtio_hw *hw,
>> +				uint32_t *addr, uint32_t val);
> One thing about abstraction is that you need define one set of operations,
> instead of two similar sets. Thus, you need define following operations
> only:
>
>   - read8
>   - read16
>   - read32
>   - write8
>   - write16
>   - write32
>
> And make a proper assignment after the modern/legacy detection.

yes, that is how ops abstraction works.

>
>> +
>> +	int (*map_pci_cfg)(struct virtio_hw *hw);
>> +	void (*unmap_pci_cfg)(struct virtio_hw *hw);
>> +	void *(*get_cfg_addr)(struct virtio_hw *hw,
>> +				struct virtio_pci_cap *cap);
>> +	int (*read_pci_cfg)(struct virtio_hw *hw,
>> +				void *buf, size_t len, off_t offset);
> It'd be good if you can post the patches that use above abstract
> operations, so that people can tell if they are properly defined.
>
> 	--yliu
>


^ permalink raw reply	[flat|nested] 120+ messages in thread

* [RFC PATCH 0/5] virtio: Add a new layer to abstract pci access method
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
@ 2016-01-21 11:07   ` Tetsuya Mukawa
  2016-01-21 11:10     ` Tetsuya Mukawa
  2016-01-21 11:07   ` [RFC PATCH 1/5] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                     ` (8 subsequent siblings)
  9 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-21 11:07 UTC (permalink / raw)
  To: dev, yuanhan.liu, jianfeng.tan

This patch series are not for upstreaming.

It describe how to use a new access method abstraction of "virtio-pci.c".
Because of this, some patches are not for upstreaming.

For example, below changes will be shared with Jianfeng's patches.
So these changes are just temporary.
 - "--shm" option to allocate EAL memory.
 - Some changes to access to EAL memory by virtual address.

Anyway, some changes are not for upstreaming, but virtual virtio-net PMD
should work with QEMU as described in commit log.

Tetsuya Mukawa (5):
  virtio: Change the parameter order of io_write8/16/32()
  virtio: move rte_eal_pci_unmap_device() to virtio_pci.c
  virtio: Add a new layer to abstract pci access method
  EAL: Add new EAL "--shm" option.
  virtio: Extend virtio-net PMD to support container environment

 config/common_linuxapp                     |    1 +
 drivers/net/virtio/Makefile                |    4 +
 drivers/net/virtio/qtest.c                 | 1237 ++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.c         |  454 ++++++++--
 drivers/net/virtio/virtio_ethdev.h         |   12 +
 drivers/net/virtio/virtio_pci.c            |  732 ++++++++++++----
 drivers/net/virtio/virtio_pci.h            |   39 +-
 drivers/net/virtio/virtio_rxtx.c           |    3 +-
 drivers/net/virtio/virtqueue.h             |    9 +-
 lib/librte_eal/common/eal_common_options.c |    5 +
 lib/librte_eal/common/eal_internal_cfg.h   |    1 +
 lib/librte_eal/common/eal_options.h        |    2 +
 lib/librte_eal/common/include/rte_memory.h |    5 +
 lib/librte_eal/linuxapp/eal/eal_memory.c   |   76 ++
 14 files changed, 2337 insertions(+), 243 deletions(-)
 create mode 100644 drivers/net/virtio/qtest.c

-- 
2.1.4

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [RFC PATCH 1/5] virtio: Change the parameter order of io_write8/16/32()
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
  2016-01-21 11:07   ` [RFC PATCH 0/5] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
@ 2016-01-21 11:07   ` Tetsuya Mukawa
  2016-01-21 11:07   ` [RFC PATCH 2/5] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c Tetsuya Mukawa
                     ` (7 subsequent siblings)
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-21 11:07 UTC (permalink / raw)
  To: dev, yuanhan.liu, jianfeng.tan

The patch change the parameter order of below functions.
 - io_write8()
 - io_write16()
 - io_write32()
This changig are needed to add a new layer to abstract accessing
method.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_pci.c | 66 ++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 6b87429..0aeffb7 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -478,7 +478,7 @@ io_read##nr_bits(type *addr)			\
 
 #define IO_WRITE_DEF(nr_bits, type)		\
 static inline void				\
-io_write##nr_bits(type val, type *addr)		\
+io_write##nr_bits(type *addr, type val)		\
 {						\
 	*(volatile type *)addr = val;		\
 }
@@ -493,10 +493,10 @@ IO_READ_DEF (32, uint32_t)
 IO_WRITE_DEF(32, uint32_t)
 
 static inline void
-io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
+io_write64_twopart(uint32_t *lo, uint32_t *hi, uint64_t val)
 {
-	io_write32(val & ((1ULL << 32) - 1), lo);
-	io_write32(val >> 32,		     hi);
+	io_write32(lo, val & ((1ULL << 32) - 1));
+	io_write32(hi, val >> 32);
 }
 
 static void
@@ -526,7 +526,7 @@ modern_write_dev_config(struct virtio_hw *hw, size_t offset,
 	const uint8_t *p = src;
 
 	for (i = 0;  i < length; i++)
-		io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i);
+		io_write8((uint8_t *)hw->dev_cfg + offset + i, *p++);
 }
 
 static uint64_t
@@ -534,10 +534,10 @@ modern_get_features(struct virtio_hw *hw)
 {
 	uint32_t features_lo, features_hi;
 
-	io_write32(0, &hw->common_cfg->device_feature_select);
+	io_write32(&hw->common_cfg->device_feature_select, 0);
 	features_lo = io_read32(&hw->common_cfg->device_feature);
 
-	io_write32(1, &hw->common_cfg->device_feature_select);
+	io_write32(&hw->common_cfg->device_feature_select, 1);
 	features_hi = io_read32(&hw->common_cfg->device_feature);
 
 	return ((uint64_t)features_hi << 32) | features_lo;
@@ -546,13 +546,13 @@ modern_get_features(struct virtio_hw *hw)
 static void
 modern_set_features(struct virtio_hw *hw, uint64_t features)
 {
-	io_write32(0, &hw->common_cfg->guest_feature_select);
-	io_write32(features & ((1ULL << 32) - 1),
-		&hw->common_cfg->guest_feature);
+	io_write32(&hw->common_cfg->guest_feature_select, 0);
+	io_write32(&hw->common_cfg->guest_feature,
+		   features & ((1ULL << 32) - 1));
 
-	io_write32(1, &hw->common_cfg->guest_feature_select);
-	io_write32(features >> 32,
-		&hw->common_cfg->guest_feature);
+	io_write32(&hw->common_cfg->guest_feature_select, 1);
+	io_write32(&hw->common_cfg->guest_feature,
+		   features >> 32);
 }
 
 static uint8_t
@@ -564,7 +564,7 @@ modern_get_status(struct virtio_hw *hw)
 static void
 modern_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	io_write8(status, &hw->common_cfg->device_status);
+	io_write8(&hw->common_cfg->device_status, status);
 }
 
 static void
@@ -583,14 +583,14 @@ modern_get_isr(struct virtio_hw *hw)
 static uint16_t
 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-	io_write16(vec, &hw->common_cfg->msix_config);
+	io_write16(&hw->common_cfg->msix_config, vec);
 	return io_read16(&hw->common_cfg->msix_config);
 }
 
 static uint16_t
 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-	io_write16(queue_id, &hw->common_cfg->queue_select);
+	io_write16(&hw->common_cfg->queue_select, queue_id);
 	return io_read16(&hw->common_cfg->queue_size);
 }
 
@@ -606,20 +606,20 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 							 ring[vq->vq_nentries]),
 				   VIRTIO_PCI_VRING_ALIGN);
 
-	io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
-				      &hw->common_cfg->queue_desc_hi);
-	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
-				       &hw->common_cfg->queue_avail_hi);
-	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
-				      &hw->common_cfg->queue_used_hi);
+	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+			   &hw->common_cfg->queue_desc_hi, desc_addr);
+	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+			   &hw->common_cfg->queue_avail_hi, avail_addr);
+	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+			   &hw->common_cfg->queue_used_hi, used_addr);
 
 	notify_off = io_read16(&hw->common_cfg->queue_notify_off);
 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
 				notify_off * hw->notify_off_multiplier);
 
-	io_write16(1, &hw->common_cfg->queue_enable);
+	io_write16(&hw->common_cfg->queue_enable, 1);
 
 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %"PRIx64, desc_addr);
@@ -632,22 +632,22 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 static void
 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
-				  &hw->common_cfg->queue_desc_hi);
-	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
-				  &hw->common_cfg->queue_avail_hi);
-	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
-				  &hw->common_cfg->queue_used_hi);
+	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+			   &hw->common_cfg->queue_desc_hi, 0);
+	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+			   &hw->common_cfg->queue_avail_hi, 0);
+	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+			   &hw->common_cfg->queue_used_hi, 0);
 
-	io_write16(0, &hw->common_cfg->queue_enable);
+	io_write16(&hw->common_cfg->queue_enable, 0);
 }
 
 static void
 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 {
-	io_write16(1, vq->notify_addr);
+	io_write16(vq->notify_addr, 1);
 }
 
 static const struct virtio_pci_ops modern_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [RFC PATCH 2/5] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
  2016-01-21 11:07   ` [RFC PATCH 0/5] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
  2016-01-21 11:07   ` [RFC PATCH 1/5] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
@ 2016-01-21 11:07   ` Tetsuya Mukawa
  2016-01-21 11:07   ` [RFC PATCH 3/5] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
                     ` (6 subsequent siblings)
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-21 11:07 UTC (permalink / raw)
  To: dev, yuanhan.liu, jianfeng.tan

To abstract pci access method, the patch moves below function
to "virtio_pci.c".
 - rte_eal_pci_unmap_device()

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c |  2 +-
 drivers/net/virtio/virtio_pci.c    | 11 +++++++++++
 drivers/net/virtio/virtio_pci.h    |  1 +
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index deb0382..37833a8 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1177,7 +1177,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
-	rte_eal_pci_unmap_device(pci_dev);
+	vtpci_uninit(pci_dev, hw);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 0aeffb7..7d7ef06 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -878,3 +878,14 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
 
 	return 0;
 }
+
+void
+vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *hw)
+{
+	hw->dev  = NULL;
+	hw->vtpci_ops = NULL;
+	hw->use_msix = 0;
+	hw->io_base  = 0;
+	hw->modern   = 0;
+	rte_eal_pci_unmap_device(dev);
+}
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 0544a07..17c7972 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -328,6 +328,7 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
  * Function declaration from virtio_pci.c
  */
 int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
+void vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [RFC PATCH 3/5] virtio: Add a new layer to abstract pci access method
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                     ` (2 preceding siblings ...)
  2016-01-21 11:07   ` [RFC PATCH 2/5] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c Tetsuya Mukawa
@ 2016-01-21 11:07   ` Tetsuya Mukawa
  2016-01-22  7:26     ` Xie, Huawei
  2016-01-21 11:07   ` [RFC PATCH 4/5] EAL: Add new EAL "--shm" option Tetsuya Mukawa
                     ` (5 subsequent siblings)
  9 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-21 11:07 UTC (permalink / raw)
  To: dev, yuanhan.liu, jianfeng.tan

This patch addss function pointers to abstract pci access method.
This abstraction layer will be used when virtio-net PMD supports
container extension.

The below functions abstract how to access to pci configuration space.

struct virtio_pci_cfg_ops {
        int   (*map)(...);
        void  (*unmap)(...);
        void *(*get_mapped_addr)(...);
        int   (*read)(...);
};

The pci configuration space has information how to access to virtio
device registers. Basically, there are 2 ways to acccess to the
registers. One is using portio and the other is using mapped memory.
The below functions abstract this access method.

struct virtio_pci_dev_ops {
        uint8_t  (*read8)(...);
        uint16_t (*read16)(...);
        uint32_t (*read32)(...);
        void     (*write8)(...);
        void     (*write16)(...);
        void     (*write32)(...);
};

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c |   4 +-
 drivers/net/virtio/virtio_pci.c    | 519 ++++++++++++++++++++++++++-----------
 drivers/net/virtio/virtio_pci.h    |  24 +-
 3 files changed, 386 insertions(+), 161 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 37833a8..c477b05 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1037,7 +1037,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	pci_dev = eth_dev->pci_dev;
 
-	if (vtpci_init(pci_dev, hw) < 0)
+	if (vtpci_init(eth_dev, hw) < 0)
 		return -1;
 
 	/* Reset the device although not necessary at startup */
@@ -1177,7 +1177,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
-	vtpci_uninit(pci_dev, hw);
+	vtpci_uninit(eth_dev, hw);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 7d7ef06..98eef85 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -49,24 +49,198 @@
 #define PCI_CAPABILITY_LIST	0x34
 #define PCI_CAP_ID_VNDR		0x09
 
+static uint8_t
+phys_legacy_read8(struct virtio_hw *hw, uint8_t *addr)
+{
+	return inb((unsigned short)(hw->io_base + (uint64_t)addr));
+}
 
-#define VIRTIO_PCI_REG_ADDR(hw, reg) \
-	(unsigned short)((hw)->io_base + (reg))
+static uint16_t
+phys_legacy_read16(struct virtio_hw *hw, uint16_t *addr)
+{
+	return inw((unsigned short)(hw->io_base + (uint64_t)addr));
+}
 
-#define VIRTIO_READ_REG_1(hw, reg) \
-	inb((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_1(hw, reg, value) \
-	outb_p((unsigned char)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+static uint32_t
+phys_legacy_read32(struct virtio_hw *hw, uint32_t *addr)
+{
+	return inl((unsigned short)(hw->io_base + (uint64_t)addr));
+}
 
-#define VIRTIO_READ_REG_2(hw, reg) \
-	inw((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_2(hw, reg, value) \
-	outw_p((unsigned short)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+static void
+phys_legacy_write8(struct virtio_hw *hw, uint8_t *addr, uint8_t val)
+{
+	return outb_p((unsigned char)val,
+			(unsigned short)(hw->io_base + (uint64_t)addr));
+}
 
-#define VIRTIO_READ_REG_4(hw, reg) \
-	inl((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_4(hw, reg, value) \
-	outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+static void
+phys_legacy_write16(struct virtio_hw *hw, uint16_t *addr, uint16_t val)
+{
+	return outb_p((unsigned short)val,
+			(unsigned short)(hw->io_base + (uint64_t)addr));
+}
+
+static void
+phys_legacy_write32(struct virtio_hw *hw, uint32_t *addr, uint32_t val)
+{
+	return outb_p((unsigned int)val,
+			(unsigned short)(hw->io_base + (uint64_t)addr));
+}
+
+static const struct virtio_pci_dev_ops phys_legacy_dev_ops = {
+	.read8		= phys_legacy_read8,
+	.read16		= phys_legacy_read16,
+	.read32		= phys_legacy_read32,
+	.write8		= phys_legacy_write8,
+	.write16	= phys_legacy_write16,
+	.write32	= phys_legacy_write32,
+};
+
+static uint8_t
+phys_modern_read8(struct virtio_hw *hw __rte_unused, uint8_t *addr)
+{
+	return *(volatile uint8_t *)addr;
+}
+
+static uint16_t
+phys_modern_read16(struct virtio_hw *hw __rte_unused, uint16_t *addr)
+{
+	return *(volatile uint16_t *)addr;
+}
+
+static uint32_t
+phys_modern_read32(struct virtio_hw *hw __rte_unused, uint32_t *addr)
+{
+	return *(volatile uint32_t *)addr;
+}
+
+static void
+phys_modern_write8(struct virtio_hw *hw __rte_unused,
+		uint8_t *addr, uint8_t val)
+{
+	*(volatile uint8_t *)addr = val;
+}
+
+static void
+phys_modern_write16(struct virtio_hw *hw __rte_unused,
+		uint16_t *addr, uint16_t val)
+{
+	*(volatile uint16_t *)addr = val;
+}
+
+static void
+phys_modern_write32(struct virtio_hw *hw __rte_unused,
+		uint32_t *addr, uint32_t val)
+{
+	*(volatile uint32_t *)addr = val;
+}
+
+static const struct virtio_pci_dev_ops phys_modern_dev_ops = {
+	.read8		= phys_modern_read8,
+	.read16		= phys_modern_read16,
+	.read32		= phys_modern_read32,
+	.write8		= phys_modern_write8,
+	.write16	= phys_modern_write16,
+	.write32	= phys_modern_write32,
+};
+
+static int
+vtpci_dev_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
+{
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		if (hw->modern == 1)
+			hw->vtpci_dev_ops = &phys_modern_dev_ops;
+		else
+			hw->vtpci_dev_ops = &phys_legacy_dev_ops;
+		return 0;
+	}
+
+	PMD_DRV_LOG(ERR, "Unkown virtio-net device.");
+	return -1;
+}
+
+static void
+vtpci_dev_uninit(struct rte_eth_dev *dev __rte_unused, struct virtio_hw *hw)
+{
+	hw->vtpci_dev_ops = NULL;
+}
+
+static int
+phys_map_pci_cfg(struct virtio_hw *hw)
+{
+	return rte_eal_pci_map_device(hw->dev);
+}
+
+static void
+phys_unmap_pci_cfg(struct virtio_hw *hw)
+{
+	rte_eal_pci_unmap_device(hw->dev);
+}
+
+static int
+phys_read_pci_cfg(struct virtio_hw *hw, void *buf, size_t len, off_t offset)
+{
+	return rte_eal_pci_read_config(hw->dev, buf, len, offset);
+}
+
+static void *
+phys_get_mapped_addr(struct virtio_hw *hw, uint8_t bar,
+		     uint32_t offset, uint32_t length)
+{
+	uint8_t *base;
+
+	if (bar > 5) {
+		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
+		return NULL;
+	}
+
+	if (offset + length < offset) {
+		PMD_INIT_LOG(ERR, "offset(%u) + lenght(%u) overflows",
+			offset, length);
+		return NULL;
+	}
+
+	if (offset + length > hw->dev->mem_resource[bar].len) {
+		PMD_INIT_LOG(ERR,
+			"invalid cap: overflows bar space: %u > %"PRIu64,
+			offset + length, hw->dev->mem_resource[bar].len);
+		return NULL;
+	}
+
+	base = hw->dev->mem_resource[bar].addr;
+	if (base == NULL) {
+		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
+		return NULL;
+	}
+
+	return base + offset;
+}
+
+static const struct virtio_pci_cfg_ops phys_cfg_ops = {
+	.map			= phys_map_pci_cfg,
+	.unmap			= phys_unmap_pci_cfg,
+	.get_mapped_addr	= phys_get_mapped_addr,
+	.read			= phys_read_pci_cfg,
+};
+
+static int
+vtpci_cfg_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
+{
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		hw->vtpci_cfg_ops = &phys_cfg_ops;
+		return 0;
+	}
+
+	PMD_DRV_LOG(ERR, "Unkown virtio-net device.");
+	return -1;
+}
+
+static void
+vtpci_cfg_uninit(struct rte_eth_dev *dev __rte_unused, struct virtio_hw *hw)
+{
+	hw->vtpci_cfg_ops = NULL;
+}
 
 static void
 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
@@ -80,13 +254,16 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
 	for (d = dst; length > 0; d += size, off += size, length -= size) {
 		if (length >= 4) {
 			size = 4;
-			*(uint32_t *)d = VIRTIO_READ_REG_4(hw, off);
+			*(uint32_t *)d = hw->vtpci_dev_ops->read32(
+						hw, (uint32_t *)off);
 		} else if (length >= 2) {
 			size = 2;
-			*(uint16_t *)d = VIRTIO_READ_REG_2(hw, off);
+			*(uint16_t *)d = hw->vtpci_dev_ops->read16(
+						hw, (uint16_t *)off);
 		} else {
 			size = 1;
-			*d = VIRTIO_READ_REG_1(hw, off);
+			*d = hw->vtpci_dev_ops->read8(
+						hw, (uint8_t *)off);
 		}
 	}
 }
@@ -103,13 +280,15 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
 	for (s = src; length > 0; s += size, off += size, length -= size) {
 		if (length >= 4) {
 			size = 4;
-			VIRTIO_WRITE_REG_4(hw, off, *(const uint32_t *)s);
+			hw->vtpci_dev_ops->write32(hw,
+					(uint32_t *)off, *(const uint32_t *)s);
 		} else if (length >= 2) {
 			size = 2;
-			VIRTIO_WRITE_REG_2(hw, off, *(const uint16_t *)s);
+			hw->vtpci_dev_ops->write16(hw,
+					(uint16_t *)off, *(const uint16_t *)s);
 		} else {
 			size = 1;
-			VIRTIO_WRITE_REG_1(hw, off, *s);
+			hw->vtpci_dev_ops->write8(hw, (uint8_t *)off, *s);
 		}
 	}
 }
@@ -117,7 +296,8 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
 static uint64_t
 legacy_get_features(struct virtio_hw *hw)
 {
-	return VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES);
+	return hw->vtpci_dev_ops->read32(hw,
+			(uint32_t *)VIRTIO_PCI_HOST_FEATURES);
 }
 
 static void
@@ -128,19 +308,20 @@ legacy_set_features(struct virtio_hw *hw, uint64_t features)
 			"only 32 bit features are allowed for legacy virtio!");
 		return;
 	}
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_GUEST_FEATURES, features);
+	hw->vtpci_dev_ops->write32(hw,
+			(uint32_t *)VIRTIO_PCI_GUEST_FEATURES, features);
 }
 
 static uint8_t
 legacy_get_status(struct virtio_hw *hw)
 {
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_STATUS);
+	return hw->vtpci_dev_ops->read8(hw, (uint8_t *)VIRTIO_PCI_STATUS);
 }
 
 static void
 legacy_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	VIRTIO_WRITE_REG_1(hw, VIRTIO_PCI_STATUS, status);
+	hw->vtpci_dev_ops->write8(hw, (uint8_t *)VIRTIO_PCI_STATUS, status);
 }
 
 static void
@@ -152,45 +333,55 @@ legacy_reset(struct virtio_hw *hw)
 static uint8_t
 legacy_get_isr(struct virtio_hw *hw)
 {
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_ISR);
+	return hw->vtpci_dev_ops->read8(hw, (uint8_t *)VIRTIO_PCI_ISR);
 }
 
 /* Enable one vector (0) for Link State Intrerrupt */
 static uint16_t
 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR, vec);
-	return VIRTIO_READ_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_MSI_CONFIG_VECTOR, vec);
+	return hw->vtpci_dev_ops->read16(hw,
+			(uint16_t *)VIRTIO_MSI_CONFIG_VECTOR);
 }
 
 static uint16_t
 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, queue_id);
-	return VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_SEL, queue_id);
+	return hw->vtpci_dev_ops->read16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_NUM);
 }
 
 static void
 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
 
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN,
-		vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	hw->vtpci_dev_ops->write32(hw,
+			(uint32_t *)VIRTIO_PCI_QUEUE_PFN,
+			vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
 }
 
 static void
 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
 
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, 0);
+	hw->vtpci_dev_ops->write32(hw,
+			(uint32_t *)VIRTIO_PCI_QUEUE_PFN, 0);
 }
 
 static void
 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_NOTIFY, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_NOTIFY,
+			vq->vq_queue_index);
 }
 
 #ifdef RTE_EXEC_ENV_LINUXAPP
@@ -468,35 +659,12 @@ static const struct virtio_pci_ops legacy_ops = {
 	.notify_queue	= legacy_notify_queue,
 };
 
-
-#define IO_READ_DEF(nr_bits, type)		\
-static inline type				\
-io_read##nr_bits(type *addr)			\
-{						\
-	return *(volatile type *)addr;		\
-}
-
-#define IO_WRITE_DEF(nr_bits, type)		\
-static inline void				\
-io_write##nr_bits(type *addr, type val)		\
-{						\
-	*(volatile type *)addr = val;		\
-}
-
-IO_READ_DEF (8, uint8_t)
-IO_WRITE_DEF(8, uint8_t)
-
-IO_READ_DEF (16, uint16_t)
-IO_WRITE_DEF(16, uint16_t)
-
-IO_READ_DEF (32, uint32_t)
-IO_WRITE_DEF(32, uint32_t)
-
 static inline void
-io_write64_twopart(uint32_t *lo, uint32_t *hi, uint64_t val)
+io_write64_twopart(struct virtio_hw *hw,
+		   uint32_t *lo, uint32_t *hi, uint64_t val)
 {
-	io_write32(lo, val & ((1ULL << 32) - 1));
-	io_write32(hi, val >> 32);
+	hw->vtpci_dev_ops->write32(hw, lo, val & ((1ULL << 32) - 1));
+	hw->vtpci_dev_ops->write32(hw, hi, val >> 32);
 }
 
 static void
@@ -508,13 +676,16 @@ modern_read_dev_config(struct virtio_hw *hw, size_t offset,
 	uint8_t old_gen, new_gen;
 
 	do {
-		old_gen = io_read8(&hw->common_cfg->config_generation);
+		old_gen = hw->vtpci_dev_ops->read8(hw,
+				&hw->common_cfg->config_generation);
 
 		p = dst;
 		for (i = 0;  i < length; i++)
-			*p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i);
+			*p++ = hw->vtpci_dev_ops->read8(hw,
+					(uint8_t *)hw->dev_cfg + offset + i);
 
-		new_gen = io_read8(&hw->common_cfg->config_generation);
+		new_gen = hw->vtpci_dev_ops->read8(hw,
+				&hw->common_cfg->config_generation);
 	} while (old_gen != new_gen);
 }
 
@@ -526,7 +697,8 @@ modern_write_dev_config(struct virtio_hw *hw, size_t offset,
 	const uint8_t *p = src;
 
 	for (i = 0;  i < length; i++)
-		io_write8((uint8_t *)hw->dev_cfg + offset + i, *p++);
+		hw->vtpci_dev_ops->write8(hw,
+				(uint8_t *)hw->dev_cfg + offset + i, *p++);
 }
 
 static uint64_t
@@ -534,11 +706,15 @@ modern_get_features(struct virtio_hw *hw)
 {
 	uint32_t features_lo, features_hi;
 
-	io_write32(&hw->common_cfg->device_feature_select, 0);
-	features_lo = io_read32(&hw->common_cfg->device_feature);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->device_feature_select, 0);
+	features_lo = hw->vtpci_dev_ops->read32(hw,
+			&hw->common_cfg->device_feature);
 
-	io_write32(&hw->common_cfg->device_feature_select, 1);
-	features_hi = io_read32(&hw->common_cfg->device_feature);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->device_feature_select, 1);
+	features_hi = hw->vtpci_dev_ops->read32(hw,
+			&hw->common_cfg->device_feature);
 
 	return ((uint64_t)features_hi << 32) | features_lo;
 }
@@ -546,25 +722,30 @@ modern_get_features(struct virtio_hw *hw)
 static void
 modern_set_features(struct virtio_hw *hw, uint64_t features)
 {
-	io_write32(&hw->common_cfg->guest_feature_select, 0);
-	io_write32(&hw->common_cfg->guest_feature,
-		   features & ((1ULL << 32) - 1));
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->guest_feature_select, 0);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->guest_feature,
+			features & ((1ULL << 32) - 1));
 
-	io_write32(&hw->common_cfg->guest_feature_select, 1);
-	io_write32(&hw->common_cfg->guest_feature,
-		   features >> 32);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->guest_feature_select, 1);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->guest_feature, features >> 32);
 }
 
 static uint8_t
 modern_get_status(struct virtio_hw *hw)
 {
-	return io_read8(&hw->common_cfg->device_status);
+	return hw->vtpci_dev_ops->read8(hw,
+			&hw->common_cfg->device_status);
 }
 
 static void
 modern_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	io_write8(&hw->common_cfg->device_status, status);
+	hw->vtpci_dev_ops->write8(hw,
+			&hw->common_cfg->device_status, status);
 }
 
 static void
@@ -577,21 +758,25 @@ modern_reset(struct virtio_hw *hw)
 static uint8_t
 modern_get_isr(struct virtio_hw *hw)
 {
-	return io_read8(hw->isr);
+	return hw->vtpci_dev_ops->read8(hw, hw->isr);
 }
 
 static uint16_t
 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-	io_write16(&hw->common_cfg->msix_config, vec);
-	return io_read16(&hw->common_cfg->msix_config);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->msix_config, vec);
+	return hw->vtpci_dev_ops->read16(hw,
+			&hw->common_cfg->msix_config);
 }
 
 static uint16_t
 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-	io_write16(&hw->common_cfg->queue_select, queue_id);
-	return io_read16(&hw->common_cfg->queue_size);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_select, queue_id);
+	return hw->vtpci_dev_ops->read16(hw,
+			&hw->common_cfg->queue_size);
 }
 
 static void
@@ -606,20 +791,23 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 							 ring[vq->vq_nentries]),
 				   VIRTIO_PCI_VRING_ALIGN);
 
-	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_desc_lo,
 			   &hw->common_cfg->queue_desc_hi, desc_addr);
-	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_avail_lo,
 			   &hw->common_cfg->queue_avail_hi, avail_addr);
-	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_used_lo,
 			   &hw->common_cfg->queue_used_hi, used_addr);
 
-	notify_off = io_read16(&hw->common_cfg->queue_notify_off);
+	notify_off = hw->vtpci_dev_ops->read16(hw,
+				&hw->common_cfg->queue_notify_off);
 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
 				notify_off * hw->notify_off_multiplier);
 
-	io_write16(&hw->common_cfg->queue_enable, 1);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_enable, 1);
 
 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %"PRIx64, desc_addr);
@@ -632,22 +820,24 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 static void
 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_desc_lo,
 			   &hw->common_cfg->queue_desc_hi, 0);
-	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_avail_lo,
 			   &hw->common_cfg->queue_avail_hi, 0);
-	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_used_lo,
 			   &hw->common_cfg->queue_used_hi, 0);
 
-	io_write16(&hw->common_cfg->queue_enable, 0);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_enable, 0);
 }
 
 static void
 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 {
-	io_write16(vq->notify_addr, 1);
+	hw->vtpci_dev_ops->write16(hw, vq->notify_addr, 1);
 }
 
 static const struct virtio_pci_ops modern_ops = {
@@ -666,7 +856,6 @@ static const struct virtio_pci_ops modern_ops = {
 	.notify_queue	= modern_notify_queue,
 };
 
-
 void
 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
 		      void *dst, int length)
@@ -739,61 +928,26 @@ vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
 	return hw->vtpci_ops->set_config_irq(hw, vec);
 }
 
-static void *
-get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
-{
-	uint8_t  bar    = cap->bar;
-	uint32_t length = cap->length;
-	uint32_t offset = cap->offset;
-	uint8_t *base;
-
-	if (bar > 5) {
-		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
-		return NULL;
-	}
-
-	if (offset + length < offset) {
-		PMD_INIT_LOG(ERR, "offset(%u) + lenght(%u) overflows",
-			offset, length);
-		return NULL;
-	}
-
-	if (offset + length > dev->mem_resource[bar].len) {
-		PMD_INIT_LOG(ERR,
-			"invalid cap: overflows bar space: %u > %"PRIu64,
-			offset + length, dev->mem_resource[bar].len);
-		return NULL;
-	}
-
-	base = dev->mem_resource[bar].addr;
-	if (base == NULL) {
-		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
-		return NULL;
-	}
-
-	return base + offset;
-}
-
 static int
-virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
+virtio_read_caps(struct virtio_hw *hw)
 {
 	uint8_t pos;
 	struct virtio_pci_cap cap;
 	int ret;
 
-	if (rte_eal_pci_map_device(dev) < 0) {
+	if (hw->vtpci_cfg_ops->map(hw) < 0) {
 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
 		return -1;
 	}
 
-	ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+	ret = hw->vtpci_cfg_ops->read(hw, &pos, 1, PCI_CAPABILITY_LIST);
 	if (ret < 0) {
 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
 		return -1;
 	}
 
 	while (pos) {
-		ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos);
+		ret = hw->vtpci_cfg_ops->read(hw, &cap, sizeof(cap), pos);
 		if (ret < 0) {
 			PMD_INIT_LOG(ERR,
 				"failed to read pci cap at pos: %x", pos);
@@ -813,18 +967,25 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 
 		switch (cap.cfg_type) {
 		case VIRTIO_PCI_CAP_COMMON_CFG:
-			hw->common_cfg = get_cfg_addr(dev, &cap);
+			hw->common_cfg =
+				hw->vtpci_cfg_ops->get_mapped_addr(
+					hw, cap.bar, cap.offset, cap.length);
 			break;
 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
-			rte_eal_pci_read_config(dev, &hw->notify_off_multiplier,
+			hw->vtpci_cfg_ops->read(hw, &hw->notify_off_multiplier,
 						4, pos + sizeof(cap));
-			hw->notify_base = get_cfg_addr(dev, &cap);
+			hw->notify_base =
+				hw->vtpci_cfg_ops->get_mapped_addr(
+					hw, cap.bar, cap.offset, cap.length);
 			break;
 		case VIRTIO_PCI_CAP_DEVICE_CFG:
-			hw->dev_cfg = get_cfg_addr(dev, &cap);
+			hw->dev_cfg =
+				hw->vtpci_cfg_ops->get_mapped_addr(
+					hw, cap.bar, cap.offset, cap.length);
 			break;
 		case VIRTIO_PCI_CAP_ISR_CFG:
-			hw->isr = get_cfg_addr(dev, &cap);
+			hw->isr = hw->vtpci_cfg_ops->get_mapped_addr(
+					hw, cap.bar, cap.offset, cap.length);
 			break;
 		}
 
@@ -849,43 +1010,87 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 	return 0;
 }
 
+static int
+vtpci_modern_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	PMD_INIT_LOG(INFO, "modern virtio pci detected.");
+
+	if (dev->dev_type == RTE_ETH_DEV_PCI)
+		pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+
+	hw->vtpci_ops = &modern_ops;
+	hw->modern = 1;
+
+	return 0;
+}
+
+static int
+vtpci_legacy_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		if (legacy_virtio_resource_init(pci_dev) < 0)
+			return -1;
+
+		hw->use_msix = legacy_virtio_has_msix(&pci_dev->addr);
+	}
+
+	hw->io_base = (uint32_t)(uintptr_t)
+		hw->vtpci_cfg_ops->get_mapped_addr(hw, 0, 0, 0);
+	hw->vtpci_ops = &legacy_ops;
+	hw->modern = 0;
+
+	return 0;
+}
+
 int
-vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
 {
-	hw->dev = dev;
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	int ret;
+
+	hw->dev = pci_dev;
+
+	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) && (pci_dev == NULL)) {
+		PMD_INIT_LOG(INFO, "No pci device specified.");
+		return -1;
+	}
+
+	if (vtpci_cfg_init(eth_dev, hw) < 0)
+		return -1;
 
 	/*
 	 * Try if we can succeed reading virtio pci caps, which exists
 	 * only on modern pci device. If failed, we fallback to legacy
 	 * virtio handling.
 	 */
-	if (virtio_read_caps(dev, hw) == 0) {
-		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
-		hw->vtpci_ops = &modern_ops;
-		hw->modern    = 1;
-		dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
-		return 0;
-	}
+	if (virtio_read_caps(hw) == 0)
+		ret = vtpci_modern_init(eth_dev, hw);
+	else
+		ret = vtpci_legacy_init(eth_dev, hw);
 
-	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
-	if (legacy_virtio_resource_init(dev) < 0)
+	if (ret < 0)
 		return -1;
 
-	hw->vtpci_ops = &legacy_ops;
-	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
-	hw->io_base  = (uint32_t)(uintptr_t)dev->mem_resource[0].addr;
-	hw->modern   = 0;
+	if (vtpci_dev_init(eth_dev, hw) < 0)
+		return -1;
 
 	return 0;
 }
 
 void
-vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_uninit(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
 {
 	hw->dev  = NULL;
 	hw->vtpci_ops = NULL;
 	hw->use_msix = 0;
 	hw->io_base  = 0;
 	hw->modern   = 0;
-	rte_eal_pci_unmap_device(dev);
+	hw->vtpci_cfg_ops->unmap(hw);
+	vtpci_dev_uninit(eth_dev, hw);
+	vtpci_cfg_uninit(eth_dev, hw);
 }
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 17c7972..7b5ad54 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -222,6 +222,24 @@ struct virtio_pci_common_cfg {
 
 struct virtio_hw;
 
+/* Functions to access pci configuration space */
+struct virtio_pci_cfg_ops {
+	int (*map)(struct virtio_hw *hw);
+	void (*unmap)(struct virtio_hw *hw);
+	void *(*get_mapped_addr)(struct virtio_hw *hw, uint8_t bar, uint32_t offset, uint32_t length);
+	int (*read)(struct virtio_hw *hw, void *buf, size_t len, off_t offset);
+};
+
+/* Functions to access pci device registers */
+struct virtio_pci_dev_ops {
+	uint8_t (*read8)(struct virtio_hw *hw, uint8_t *addr);
+	uint16_t (*read16)(struct virtio_hw *hw, uint16_t *addr);
+	uint32_t (*read32)(struct virtio_hw *hw, uint32_t *addr);
+	void (*write8)(struct virtio_hw *hw, uint8_t *addr, uint8_t val);
+	void (*write16)(struct virtio_hw *hw, uint16_t *addr, uint16_t val);
+	void (*write32)(struct virtio_hw *hw, uint32_t *addr, uint32_t val);
+};
+
 struct virtio_pci_ops {
 	void (*read_dev_cfg)(struct virtio_hw *hw, size_t offset,
 			     void *dst, int len);
@@ -266,6 +284,8 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	const struct virtio_pci_ops *vtpci_ops;
+	const struct virtio_pci_cfg_ops *vtpci_cfg_ops;
+	const struct virtio_pci_dev_ops *vtpci_dev_ops;
 };
 
 /*
@@ -327,8 +347,8 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
 /*
  * Function declaration from virtio_pci.c
  */
-int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
-void vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *);
+int vtpci_init(struct rte_eth_dev *, struct virtio_hw *);
+void vtpci_uninit(struct rte_eth_dev *, struct virtio_hw *);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [RFC PATCH 4/5] EAL: Add new EAL "--shm" option.
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                     ` (3 preceding siblings ...)
  2016-01-21 11:07   ` [RFC PATCH 3/5] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
@ 2016-01-21 11:07   ` Tetsuya Mukawa
  2016-01-22  1:43     ` Tan, Jianfeng
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                     ` (4 subsequent siblings)
  9 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-21 11:07 UTC (permalink / raw)
  To: dev, yuanhan.liu, jianfeng.tan

This is a temporary patch to get EAL memory under 16T(1 << 44).

The patch adds new EAL "--shm" option. If the option is specified,
EAL will allocate one file from hugetlbfs. This memory is for sharing
memory between DPDK applicaiton and QEMU ivhsmem device.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_eal/common/eal_common_options.c |  5 ++
 lib/librte_eal/common/eal_internal_cfg.h   |  1 +
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/common/include/rte_memory.h |  5 ++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 76 ++++++++++++++++++++++++++++++
 5 files changed, 89 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 29942ea..a752bf3 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -86,6 +86,7 @@ eal_long_options[] = {
 	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
 	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
 	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
+	{OPT_SHM,               0, NULL, OPT_SHM_NUM              },
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
 	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
 	{OPT_PROC_TYPE,         1, NULL, OPT_PROC_TYPE_NUM        },
@@ -834,6 +835,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_hugetlbfs = 1;
 		break;
 
+	case OPT_SHM_NUM:
+		conf->shm = 1;
+		break;
+
 	case OPT_NO_PCI_NUM:
 		conf->no_pci = 1;
 		break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 5f1367e..362ce12 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -66,6 +66,7 @@ struct internal_config {
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
 	unsigned hugepage_unlink;         /**< true to unlink backing files */
 	volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
+	volatile unsigned shm;            /**< true to create shared memory for ivshmem */
 	volatile unsigned no_pci;         /**< true to disable PCI */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index a881c62..c1e586a 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -55,6 +55,8 @@ enum {
 	OPT_HUGE_DIR_NUM,
 #define OPT_HUGE_UNLINK       "huge-unlink"
 	OPT_HUGE_UNLINK_NUM,
+#define OPT_SHM               "shm"
+	OPT_SHM_NUM,
 #define OPT_LCORES            "lcores"
 	OPT_LCORES_NUM,
 #define OPT_LOG_LEVEL         "log-level"
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 9c9e40f..3ad155b 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -102,6 +102,7 @@ struct rte_memseg {
 	int32_t socket_id;          /**< NUMA socket ID. */
 	uint32_t nchannel;          /**< Number of channels. */
 	uint32_t nrank;             /**< Number of ranks. */
+	int fd;                     /**< fd used for share this memory */
 #ifdef RTE_LIBRTE_XEN_DOM0
 	 /**< store segment MFNs */
 	uint64_t mfn[DOM0_NUM_MEMBLOCK];
@@ -130,6 +131,10 @@ int rte_mem_lock_page(const void *virt);
  */
 phys_addr_t rte_mem_virt2phy(const void *virt);
 
+
+int
+rte_memseg_info_get(int index, int *pfd, uint64_t *psize, void **paddr);
+
 /**
  * Get the layout of the available physical memory.
  *
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 846fd31..7122f16 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -150,6 +150,21 @@ rte_mem_lock_page(const void *virt)
 	return mlock((void*)aligned, page_size);
 }
 
+int
+rte_memseg_info_get(int index, int *pfd, uint64_t *psize, void **paddr)
+{
+	struct rte_mem_config *mcfg;
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (pfd != NULL)
+		*pfd = mcfg->memseg[index].fd;
+	if (psize != NULL)
+		*psize = (uint64_t)mcfg->memseg[index].len;
+	if (paddr != NULL)
+		*paddr = (void *)(uint64_t)mcfg->memseg[index].addr;
+	return 0;
+}
+
 /*
  * Get physical address of any mapped virtual address in the current process.
  */
@@ -1075,6 +1090,46 @@ calc_num_pages_per_socket(uint64_t * memory,
 	return total_num_pages;
 }
 
+static void *
+rte_eal_shm_create(int *pfd, const char *hugedir)
+{
+	int ret, fd;
+	char filepath[256];
+	void *vaddr;
+	uint64_t size = internal_config.memory;
+
+	sprintf(filepath, "%s/%s_cvio", hugedir,
+			internal_config.hugefile_prefix);
+
+	fd = open(filepath, O_CREAT | O_RDWR, 0600);
+	if (fd < 0)
+		rte_panic("open %s failed: %s\n", filepath, strerror(errno));
+
+	ret = flock(fd, LOCK_EX);
+	if (ret < 0) {
+		close(fd);
+		rte_panic("flock %s failed: %s\n", filepath, strerror(errno));
+	}
+
+	ret = ftruncate(fd, size);
+	if (ret < 0)
+		rte_panic("ftruncate failed: %s\n", strerror(errno));
+
+	/*
+	 * Here, we need to map under (1 << 44).
+	 * This is temporary implementation.
+	 */
+	vaddr = mmap((void *)(1ULL << 43), size, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_FIXED, fd, 0);
+	if (vaddr != MAP_FAILED) {
+		memset(vaddr, 0, size);
+		*pfd = fd;
+	}
+	memset(vaddr, 0, size);
+
+	return vaddr;
+}
+
 /*
  * Prepare physical memory mapping: fill configuration structure with
  * these infos, return 0 on success.
@@ -1127,6 +1182,27 @@ rte_eal_hugepage_init(void)
 		return 0;
 	}
 
+	/* create shared memory consist of only one file */
+	if (internal_config.shm) {
+		int fd;
+		struct hugepage_info *hpi;
+
+		hpi = &internal_config.hugepage_info[0];
+		addr = rte_eal_shm_create(&fd, hpi->hugedir);
+		if (addr == MAP_FAILED) {
+			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
+					strerror(errno));
+			return -1;
+		}
+		mcfg->memseg[0].phys_addr = rte_mem_virt2phy(addr);
+		mcfg->memseg[0].addr = addr;
+		mcfg->memseg[0].hugepage_sz = hpi->hugepage_sz;
+		mcfg->memseg[0].len = internal_config.memory;
+		mcfg->memseg[0].socket_id = 0;
+		mcfg->memseg[0].fd = fd;
+		return 0;
+	}
+
 /* check if app runs on Xen Dom0 */
 	if (internal_config.xen_dom0_support) {
 #ifdef RTE_LIBRTE_XEN_DOM0
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                     ` (4 preceding siblings ...)
  2016-01-21 11:07   ` [RFC PATCH 4/5] EAL: Add new EAL "--shm" option Tetsuya Mukawa
@ 2016-01-21 11:07   ` Tetsuya Mukawa
  2016-01-22  8:14     ` Xie, Huawei
                       ` (12 more replies)
  2016-01-28  9:33   ` [PATCH v2 0/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
                     ` (3 subsequent siblings)
  9 siblings, 13 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-21 11:07 UTC (permalink / raw)
  To: dev, yuanhan.liu, jianfeng.tan

virtio: Extend virtio-net PMD to support container environment

The patch adds a new virtio-net PMD configuration that allows the PMD to
work on host as if the PMD is in VM.
Here is new configuration for virtio-net PMD.
 - CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE
To use this mode, EAL needs physically contiguous memory. To allocate
such memory, add "--shm" option to application command line.

To prepare virtio-net device on host, the users need to invoke QEMU
process in special qtest mode. This mode is mainly used for testing QEMU
devices from outer process. In this mode, no guest runs.
Here is QEMU command line.

 $ qemu-system-x86_64 \
             -machine pc-i440fx-1.4,accel=qtest \
             -display none -qtest-log /dev/null \
             -qtest unix:/tmp/socket,server \
             -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1\
             -device virtio-net-pci,netdev=net0,mq=on \
             -chardev socket,id=chr1,path=/tmp/ivshmem,server \
             -device ivshmem,size=1G,chardev=chr1,vectors=1

 * QEMU process is needed per port.
 * Virtio-1.0 device is supported.
 * In most cases, just using above command is enough.
 * The vhost backends like vhost-net and vhost-user can be specified.
 * Only checked "pc-i440fx-1.4" machine, but may work with other
   machines. It depends on a machine has piix3 south bridge.
   If the machine doesn't have, virtio-net PMD cannot receive status
   changed interrupts.
 * Should not add "--enable-kvm" to QEMU command line.

After invoking QEMU, the PMD can connect to QEMU process using unix
domain sockets. Over these sockets, virtio-net, ivshmem and piix3
device in QEMU are probed by the PMD.
Here is example of command line.

 $ testpmd -c f -n 1 -m 1024 --shm \
      --vdev="eth_virtio_net0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
      -- --disable-hw-vlan --txqflags=0xf00 -i

Please specify same unix domain sockets and memory size in both QEMU
and DPDK command lines like above.
The share memory size should be power of 2, because ivshmem only
accepts such memry size.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 config/common_linuxapp             |    1 +
 drivers/net/virtio/Makefile        |    4 +
 drivers/net/virtio/qtest.c         | 1237 ++++++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.c |  450 ++++++++++---
 drivers/net/virtio/virtio_ethdev.h |   12 +
 drivers/net/virtio/virtio_pci.c    |  190 +++++-
 drivers/net/virtio/virtio_pci.h    |   16 +
 drivers/net/virtio/virtio_rxtx.c   |    3 +-
 drivers/net/virtio/virtqueue.h     |    9 +-
 9 files changed, 1845 insertions(+), 77 deletions(-)
 create mode 100644 drivers/net/virtio/qtest.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 74bc515..04682f6 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -269,6 +269,7 @@ CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n
 # Compile burst-oriented VIRTIO PMD driver
 #
 CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
+CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE=y
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 43835ba..697e629 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -52,6 +52,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
 
+ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE),y)
+	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
+endif
+
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
new file mode 100644
index 0000000..717bee9
--- /dev/null
+++ b/drivers/net/virtio/qtest.c
@@ -0,0 +1,1237 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/queue.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+#include <rte_interrupts.h>
+
+#include "virtio_pci.h"
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+
+#define NB_BUS                          256
+#define NB_DEVICE                       32
+#define NB_BAR                          6
+
+/* PCI common configuration registers */
+#define REG_ADDR_VENDOR_ID              0x0
+#define REG_ADDR_DEVICE_ID              0x2
+#define REG_ADDR_COMMAND                0x4
+#define REG_ADDR_STATUS                 0x6
+#define REG_ADDR_REVISION_ID            0x8
+#define REG_ADDR_CLASS_CODE             0x9
+#define REG_ADDR_CACHE_LINE_S           0xc
+#define REG_ADDR_LAT_TIMER              0xd
+#define REG_ADDR_HEADER_TYPE            0xe
+#define REG_ADDR_BIST                   0xf
+#define REG_ADDR_BAR0                   0x10
+#define REG_ADDR_BAR1                   0x14
+#define REG_ADDR_BAR2                   0x18
+#define REG_ADDR_BAR3                   0x1c
+#define REG_ADDR_BAR4                   0x20
+#define REG_ADDR_BAR5                   0x24
+
+/* PCI common configuration register values */
+#define REG_VAL_COMMAND_IO              0x1
+#define REG_VAL_COMMAND_MEMORY          0x2
+#define REG_VAL_COMMAND_MASTER          0x4
+#define REG_VAL_HEADER_TYPE_ENDPOINT    0x0
+#define REG_VAL_BAR_MEMORY              0x0
+#define REG_VAL_BAR_IO                  0x1
+#define REG_VAL_BAR_LOCATE_32           0x0
+#define REG_VAL_BAR_LOCATE_UNDER_1MB    0x2
+#define REG_VAL_BAR_LOCATE_64           0x4
+
+/* PIIX3 configuration registers */
+#define PIIX3_REG_ADDR_PIRQA            0x60
+#define PIIX3_REG_ADDR_PIRQB            0x61
+#define PIIX3_REG_ADDR_PIRQC            0x62
+#define PIIX3_REG_ADDR_PIRQD            0x63
+
+/* Device information */
+#define VIRTIO_NET_DEVICE_ID            0x1000
+#define VIRTIO_NET_VENDOR_ID            0x1af4
+#define VIRTIO_NET_IO_START             0xc000
+#define VIRTIO_NET_MEMORY1_START	0x1000000000
+#define VIRTIO_NET_MEMORY2_START	0x2000000000
+#define VIRTIO_NET_IRQ_NUM              10
+#define IVSHMEM_DEVICE_ID               0x1110
+#define IVSHMEM_VENDOR_ID               0x1af4
+#define IVSHMEM_MEMORY_START            0x3000000000
+#define IVSHMEM_PROTOCOL_VERSION        0
+#define PIIX3_DEVICE_ID                 0x7000
+#define PIIX3_VENDOR_ID                 0x8086
+
+#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
+	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
+	((_function) & 0xf) << 8 | ((_offset) & 0xfc))
+
+static char interrupt_message[32];
+
+enum qtest_pci_bar_type {
+	QTEST_PCI_BAR_DISABLE = 0,
+	QTEST_PCI_BAR_IO,
+	QTEST_PCI_BAR_MEMORY_UNDER_1MB,
+	QTEST_PCI_BAR_MEMORY_32,
+	QTEST_PCI_BAR_MEMORY_64
+};
+
+struct qtest_pci_bar {
+	enum qtest_pci_bar_type type;
+	uint8_t addr;
+	uint64_t region_start;
+	uint64_t region_size;
+};
+
+struct qtest_session;
+TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
+struct qtest_pci_device {
+	TAILQ_ENTRY(qtest_pci_device) next;
+	const char *name;
+	uint16_t device_id;
+	uint16_t vendor_id;
+	uint8_t bus_addr;
+	uint8_t device_addr;
+	struct qtest_pci_bar bar[NB_BAR];
+	int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
+};
+
+union qtest_pipefds {
+	struct {
+		int pipefd[2];
+	};
+	struct {
+		int readfd;
+		int writefd;
+	};
+};
+
+struct qtest_session {
+	int qtest_socket;
+	pthread_mutex_t qtest_session_lock;
+
+	struct qtest_pci_device_list head;
+	int ivshmem_socket;
+
+	pthread_t event_th;
+	union qtest_pipefds msgfds;
+
+	pthread_t intr_th;
+	union qtest_pipefds irqfds;
+	rte_atomic16_t enable_intr;
+	rte_intr_callback_fn cb;
+	void *cb_arg;
+};
+
+static int
+qtest_raw_send(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = write(fd, buf, len);
+		if (ret == (int)len)
+			break;
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			return ret;
+		}
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+static int
+qtest_raw_recv(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = read(fd, buf, len);
+		if (ret == (int)len)
+			break;
+		if (*(buf + ret - 1) == '\n')
+			break;
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			return ret;
+		}
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+/*
+ * To know QTest protocol specification, see below QEMU source code.
+ *  - qemu/qtest.c
+ * If qtest socket is closed, qtest_raw_in and qtest_raw_read will return 0.
+ */
+static uint32_t
+qtest_raw_in(struct qtest_session *s, uint16_t addr, char type)
+{
+	char buf[1024];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "in%c 0x%x\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_out(struct qtest_session *s, uint16_t addr, uint32_t val, char type)
+{
+	char buf[1024];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "out%c 0x%x 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+static uint32_t
+qtest_raw_read(struct qtest_session *s, uint64_t addr, char type)
+{
+	char buf[1024];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "read%c 0x%lx\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_write(struct qtest_session *s, uint64_t addr, uint32_t val, char type)
+{
+	char buf[1024];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "write%c 0x%lx 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+/*
+ * qtest_pci_inX/outX are used for accessing PCI configuration space.
+ * The functions are implemented based on PCI configuration space
+ * specification.
+ * Accroding to the spec, access size of read()/write() should be 4 bytes.
+ */
+static int
+qtest_pci_inb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return (tmp >> ((offset & 0x3) * 8)) & 0xff;
+}
+
+static void
+qtest_pci_outb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint8_t value)
+{
+	uint32_t addr, tmp, pos;
+
+	addr = PCI_CONFIG_ADDR(bus, device, function, offset);
+	pos = (offset % 4) * 8;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+	tmp = (tmp & ~(0xff << pos)) | (value << pos);
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	qtest_raw_out(s, 0xcfc, tmp, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint32_t
+qtest_pci_inl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return tmp;
+}
+
+static void
+qtest_pci_outl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint32_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, value, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint64_t
+qtest_pci_inq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+	uint64_t val;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val = (uint64_t)qtest_raw_in(s, 0xcfc, 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val |= (uint64_t)qtest_raw_in(s, 0xcfc, 'l') << 32;
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return val;
+}
+
+static void
+qtest_pci_outq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint64_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value & 0xffffffff), 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value >> 32), 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+/*
+ * qtest_in/out are used for accessing ioport of qemu guest.
+ * qtest_read/write are used for accessing memory of qemu guest.
+ */
+uint32_t
+qtest_in(struct virtio_hw *hw, uint16_t addr, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_in(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_out(struct virtio_hw *hw, uint16_t addr, uint64_t val, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+uint32_t
+qtest_read(struct virtio_hw *hw, uint64_t addr, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_read(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_write(struct virtio_hw *hw, uint64_t addr, uint64_t val, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_write(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+static struct qtest_pci_device *
+qtest_find_device(struct qtest_session *s, const char *name)
+{
+	struct qtest_pci_device *dev;
+
+	TAILQ_FOREACH(dev, &s->head, next) {
+		if (strcmp(dev->name, name) == 0)
+			return dev;
+	}
+	return NULL;
+}
+
+/*
+ * The function is used for reading pci configuration space of specifed device.
+ */
+int
+qtest_read_pci_cfg(struct virtio_hw *hw, const char *name,
+		void *buf, size_t len, off_t offset)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	struct qtest_pci_device *dev;
+	uint32_t i;
+	uint8_t *p = buf;
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s\n", name);
+		return -1;
+	}
+
+	for (i = 0; i < len; i++) {
+		*(p + i) = qtest_pci_inb(s,
+				dev->bus_addr, dev->device_addr, 0, offset + i);
+	}
+
+	return 0;
+}
+
+static struct qtest_pci_bar *
+qtest_get_bar(struct virtio_hw *hw, const char *name, uint8_t bar)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	struct qtest_pci_device *dev;
+
+	if (bar >= NB_BAR) {
+		PMD_DRV_LOG(ERR, "Invalid bar is specified: %u\n", bar);
+		return NULL;
+	}
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s\n", name);
+		return NULL;
+	}
+
+	if (dev->bar[bar].type == QTEST_PCI_BAR_DISABLE) {
+		PMD_DRV_LOG(ERR, "Cannot find valid BAR(%s): %u\n", name, bar);
+		return NULL;
+	}
+
+	return &dev->bar[bar];
+}
+
+int
+qtest_get_bar_addr(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t *addr)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(hw, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*addr = bar_ptr->region_start;
+	return 0;
+}
+
+int
+qtest_get_bar_size(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t *size)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(hw, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*size = bar_ptr->region_size;
+	return 0;
+}
+
+int
+qtest_intr_enable(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 1);
+
+	return 0;
+}
+
+int
+qtest_intr_disable(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 0);
+
+	return 0;
+}
+
+void
+qtest_intr_callback_register(void *data,
+		rte_intr_callback_fn cb, void *cb_arg)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	s->cb = cb;
+	s->cb_arg = cb_arg;
+	rte_atomic16_set(&s->enable_intr, 1);
+}
+
+void
+qtest_intr_callback_unregister(void *data,
+		rte_intr_callback_fn cb __rte_unused,
+		void *cb_arg __rte_unused)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->cb = NULL;
+	s->cb_arg = NULL;
+}
+
+static void *
+qtest_intr_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	char buf[1];
+	int ret;
+
+	for (;;) {
+		ret = qtest_raw_recv(s->irqfds.readfd, buf, sizeof(buf));
+		if (ret < 0)
+			return NULL;
+		s->cb(NULL, s->cb_arg);
+	}
+	return NULL;
+}
+
+static int
+qtest_intr_initialize(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+	char buf[1024];
+	int ret;
+
+	s = (struct qtest_session *)hw->qsession;
+
+	/* This message will come when interrupt occurs */
+	snprintf(interrupt_message, sizeof(interrupt_message),
+			"IRQ raise %d", VIRTIO_NET_IRQ_NUM);
+
+	snprintf(buf, sizeof(buf), "irq_intercept_in ioapic\n");
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	/* To enable interrupt, send "irq_intercept_in" message to QEMU */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	/* just ignore QEMU response */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return 0;
+}
+
+static void
+qtest_handle_one_message(struct qtest_session *s, char *buf)
+{
+	int ret;
+
+	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
+		if (rte_atomic16_read(&s->enable_intr) == 0)
+			return;
+
+		/* relay interrupt to pipe */
+		ret = write(s->irqfds.writefd, "1", 1);
+		if (ret < 0)
+			rte_panic("cannot relay interrupt\n");
+	} else {
+		/* relay normal message to pipe */
+		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
+		if (ret < 0)
+			rte_panic("cannot relay normal message\n");
+	}
+}
+
+static char *
+qtest_get_next_message(char *p)
+{
+	p = strchr(p, '\n');
+	if ((p == NULL) || (*(p + 1) == '\0'))
+		return NULL;
+	return p + 1;
+}
+
+static void
+qtest_close_one_socket(int *fd)
+{
+	if (*fd > 0) {
+		close(*fd);
+		*fd = -1;
+	}
+}
+
+static void
+qtest_close_sockets(struct qtest_session *s)
+{
+	qtest_close_one_socket(&s->qtest_socket);
+	qtest_close_one_socket(&s->msgfds.readfd);
+	qtest_close_one_socket(&s->msgfds.writefd);
+	qtest_close_one_socket(&s->irqfds.readfd);
+	qtest_close_one_socket(&s->irqfds.writefd);
+	qtest_close_one_socket(&s->ivshmem_socket);
+}
+
+/*
+ * This thread relays QTest response using pipe.
+ * The function is needed because we need to separate IRQ message from others.
+ */
+static void *
+qtest_event_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	char buf[1024];
+	char *p;
+	int ret;
+
+	for (;;) {
+		memset(buf, 0, sizeof(buf));
+		ret = qtest_raw_recv(s->qtest_socket, buf, sizeof(buf));
+		if (ret < 0) {
+			qtest_close_sockets(s);
+			return NULL;
+		}
+
+		/* may receive multiple messages at the same time */
+		p = buf;
+		do {
+			qtest_handle_one_message(s, p);
+		} while ((p = qtest_get_next_message(p)) != NULL);
+	}
+	return NULL;
+}
+
+static int
+qtest_init_piix3_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t bus, device, virtio_net_slot = 0;
+	struct qtest_pci_device *tmpdev;
+	uint8_t pcislot2regaddr[] = {	0xff,
+					0xff,
+					0xff,
+					PIIX3_REG_ADDR_PIRQC,
+					PIIX3_REG_ADDR_PIRQD,
+					PIIX3_REG_ADDR_PIRQA,
+					PIIX3_REG_ADDR_PIRQB};
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Get slot id that is connected to virtio-net */
+	TAILQ_FOREACH(tmpdev, &s->head, next) {
+		if (strcmp(tmpdev->name, "virtio-net") == 0) {
+			virtio_net_slot = tmpdev->device_addr;
+			break;
+		}
+	}
+
+	if (virtio_net_slot == 0)
+		return -1;
+
+	/*
+	 * Set interrupt routing for virtio-net device.
+	 * Here is i440fx/piix3 connection settings
+	 * ---------------------------------------
+	 * PCI Slot3 -> PIRQC
+	 * PCI Slot4 -> PIRQD
+	 * PCI Slot5 -> PIRQA
+	 * PCI Slot6 -> PIRQB
+	 */
+	if (pcislot2regaddr[virtio_net_slot] != 0xff) {
+		qtest_pci_outb(s, bus, device, 0,
+				pcislot2regaddr[virtio_net_slot],
+				VIRTIO_NET_IRQ_NUM);
+	}
+
+	return 0;
+}
+
+/*
+ * Common initialization of PCI device.
+ * To know detail, see pci specification.
+ */
+static int
+qtest_init_pci_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t i, bus, device;
+	uint32_t val;
+	uint64_t val64;
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Check header type */
+	val = qtest_pci_inb(s, bus, device, 0, REG_ADDR_HEADER_TYPE);
+	if (val != REG_VAL_HEADER_TYPE_ENDPOINT) {
+		PMD_DRV_LOG(ERR, "Unexpected header type %d\n", val);
+		return -1;
+	}
+
+	/* Check BAR type */
+	for (i = 0; i < NB_BAR; i++) {
+		val = qtest_pci_inl(s, bus, device, 0, dev->bar[i].addr);
+
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+			if ((val & 0x1) != REG_VAL_BAR_IO)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+			if ((val & 0x1) != REG_VAL_BAR_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != REG_VAL_BAR_LOCATE_UNDER_1MB)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_32:
+			if ((val & 0x1) != REG_VAL_BAR_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != REG_VAL_BAR_LOCATE_32)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+
+			if ((val & 0x1) != REG_VAL_BAR_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != REG_VAL_BAR_LOCATE_64)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Enable device */
+	val = qtest_pci_inl(s, bus, device, 0, REG_ADDR_COMMAND);
+	val |= REG_VAL_COMMAND_IO | REG_VAL_COMMAND_MEMORY | REG_VAL_COMMAND_MASTER;
+	qtest_pci_outl(s, bus, device, 0, REG_ADDR_COMMAND, val);
+
+	/* Calculate BAR size */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffff);
+			val = qtest_pci_inl(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size = ~(val & 0xfffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffffffffffff);
+			val64 = qtest_pci_inq(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size =
+					~(val64 & 0xfffffffffffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Set BAR region */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static void
+qtest_find_pci_device(struct qtest_session *s, uint16_t bus, uint8_t device)
+{
+	struct qtest_pci_device *dev;
+	uint32_t val;
+
+	val = qtest_pci_inl(s, bus, device, 0, 0);
+	TAILQ_FOREACH(dev, &s->head, next) {
+		if (val == ((uint32_t)dev->device_id << 16 | dev->vendor_id)) {
+			dev->bus_addr = bus;
+			dev->device_addr = device;
+			return;
+		}
+
+	}
+}
+
+static int
+qtest_init_pci_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *dev;
+	uint16_t bus;
+	uint8_t device;
+	int ret;
+
+	/* Find devices */
+	bus = 0;
+	do {
+		device = 0;
+		do {
+			qtest_find_pci_device(s, bus, device);
+		} while (device++ != NB_DEVICE - 1);
+	} while (bus++ != NB_BUS - 1);
+
+	/* Initialize devices */
+	TAILQ_FOREACH(dev, &s->head, next) {
+		ret = dev->init(s, dev);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+struct rte_pci_id
+qtest_get_pci_id_of_virtio_net(void)
+{
+	struct rte_pci_id id =  {VIRTIO_NET_DEVICE_ID,
+		VIRTIO_NET_VENDOR_ID, PCI_ANY_ID, PCI_ANY_ID};
+
+	return id;
+}
+
+static int
+qtest_register_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *virtio_net, *ivshmem, *piix3;
+	const struct rte_memseg *ms;
+
+	ms = rte_eal_get_physmem_layout();
+	/* if EAL memory size isn't pow of 2, ivshmem will refuse it */
+	if ((ms[0].len & (ms[0].len - 1)) != 0) {
+		PMD_DRV_LOG(ERR, "memory size must be power of 2\n");
+		return -1;
+	}
+
+	virtio_net = malloc(sizeof(*virtio_net));
+	if (virtio_net == NULL)
+		return -1;
+
+	ivshmem = malloc(sizeof(*ivshmem));
+	if (ivshmem == NULL)
+		return -1;
+
+	piix3 = malloc(sizeof(*piix3));
+	if (piix3 == NULL)
+		return -1;
+
+	memset(virtio_net, 0, sizeof(*virtio_net));
+	memset(ivshmem, 0, sizeof(*ivshmem));
+
+	TAILQ_INIT(&s->head);
+
+	virtio_net->name = "virtio-net";
+	virtio_net->device_id = VIRTIO_NET_DEVICE_ID;
+	virtio_net->vendor_id = VIRTIO_NET_VENDOR_ID;
+	virtio_net->init = qtest_init_pci_device;
+	virtio_net->bar[0].addr = REG_ADDR_BAR0;
+	virtio_net->bar[0].type = QTEST_PCI_BAR_IO;
+	virtio_net->bar[0].region_start = VIRTIO_NET_IO_START;
+	virtio_net->bar[1].addr = REG_ADDR_BAR1;
+	virtio_net->bar[1].type = QTEST_PCI_BAR_MEMORY_32;
+	virtio_net->bar[1].region_start = VIRTIO_NET_MEMORY1_START;
+	virtio_net->bar[4].addr = REG_ADDR_BAR4;
+	virtio_net->bar[4].type = QTEST_PCI_BAR_MEMORY_64;
+	virtio_net->bar[4].region_start = VIRTIO_NET_MEMORY2_START;
+	TAILQ_INSERT_TAIL(&s->head, virtio_net, next);
+
+	ivshmem->name = "ivshmem";
+	ivshmem->device_id = IVSHMEM_DEVICE_ID;
+	ivshmem->vendor_id = IVSHMEM_VENDOR_ID;
+	ivshmem->init = qtest_init_pci_device;
+	ivshmem->bar[0].addr = REG_ADDR_BAR0;
+	ivshmem->bar[0].type = QTEST_PCI_BAR_MEMORY_32;
+	ivshmem->bar[0].region_start = IVSHMEM_MEMORY_START;
+	ivshmem->bar[2].addr = REG_ADDR_BAR2;
+	ivshmem->bar[2].type = QTEST_PCI_BAR_MEMORY_64;
+	/* In host mode, only one memory segment is vaild */
+	ivshmem->bar[2].region_start = (uint64_t)ms[0].addr;
+	TAILQ_INSERT_TAIL(&s->head, ivshmem, next);
+
+	/* piix3 is needed to route irqs from virtio-net to ioapic */
+	piix3->name = "piix3";
+	piix3->device_id = PIIX3_DEVICE_ID;
+	piix3->vendor_id = PIIX3_VENDOR_ID;
+	piix3->init = qtest_init_piix3_device;
+	TAILQ_INSERT_TAIL(&s->head, piix3, next);
+
+	return 0;
+}
+
+static int
+qtest_send_message_to_ivshmem(int sock_fd, uint64_t client_id, int shm_fd)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = &client_id;
+	iov.iov_len = sizeof(client_id);
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (shm_fd >= 0) {
+		msgh.msg_control = &control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), &shm_fd, fdsize);
+	}
+
+	do {
+		ret = sendmsg(sock_fd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int
+qtest_setup_shared_memory(struct qtest_session *s)
+{
+	int shm_fd, ret;
+
+	rte_memseg_info_get(0, &shm_fd, NULL, NULL);
+
+	/* send our protocol version first */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket,
+			IVSHMEM_PROTOCOL_VERSION, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR,
+			"Failed to send protocol version to ivshmem\n");
+		return -1;
+	}
+
+	/* send client id */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, 0, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to send VMID to ivshmem\n");
+		return -1;
+	}
+
+	/* send message to ivshmem */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, -1, shm_fd);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to file descriptor to ivshmem\n");
+		return -1;
+	}
+
+	/* close EAL memory again */
+	close(shm_fd);
+
+	return 0;
+}
+
+int
+qtest_vdev_init(struct rte_eth_dev_data *data,
+		int qtest_socket, int ivshmem_socket)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+	int ret;
+
+	s = rte_zmalloc(NULL, sizeof(*s), RTE_CACHE_LINE_SIZE);
+
+	ret = pipe(s->msgfds.pipefd);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize message pipe\n");
+		return -1;
+	}
+
+	ret = pipe(s->irqfds.pipefd);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize irq pipe\n");
+		return -1;
+	}
+
+	ret = qtest_register_target_devices(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize qtest session\n");
+		return -1;
+	}
+
+	ret = pthread_mutex_init(&s->qtest_session_lock, NULL);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize mutex\n");
+		return -1;
+	}
+
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->qtest_socket = qtest_socket;
+	s->ivshmem_socket = ivshmem_socket;
+	hw->qsession = (void *)s;
+
+	ret = pthread_create(&s->event_th, NULL, qtest_event_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create event handler\n");
+		return -1;
+	}
+
+	ret = pthread_create(&s->intr_th, NULL, qtest_intr_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create interrupt handler\n");
+		return -1;
+	}
+
+	ret = qtest_intr_initialize(data);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize interrupt\n");
+		return -1;
+	}
+
+	ret = qtest_setup_shared_memory(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to setup shared memory\n");
+		return -1;
+	}
+
+	ret = qtest_init_pci_devices(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize devices\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+qtest_remove_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *dev, *next;
+
+	for (dev = TAILQ_FIRST(&s->head); dev != NULL; dev = next) {
+		next = TAILQ_NEXT(dev, next);
+		TAILQ_REMOVE(&s->head, dev, next);
+		free(dev);
+	}
+}
+
+void
+qtest_vdev_uninit(struct rte_eth_dev_data *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+
+	qtest_close_sockets(s);
+
+	pthread_cancel(s->event_th);
+	pthread_join(s->event_th, NULL);
+
+	pthread_cancel(s->intr_th);
+	pthread_join(s->intr_th, NULL);
+
+	pthread_mutex_destroy(&s->qtest_session_lock);
+
+	qtest_remove_target_devices(s);
+
+	rte_free(s);
+}
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index c477b05..e32f1dd 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -36,6 +36,10 @@
 #include <stdio.h>
 #include <errno.h>
 #include <unistd.h>
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+#include <sys/socket.h>
+#include <sys/un.h>
+#endif
 
 #include <rte_ethdev.h>
 #include <rte_memcpy.h>
@@ -52,6 +56,10 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+#include <rte_eal_memconfig.h>
+#include <rte_kvargs.h>
+#endif
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -160,8 +168,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 	if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
 		return -1;
 
-	memcpy(vq->virtio_net_hdr_mz->addr, ctrl,
-		sizeof(struct virtio_pmd_ctrl));
+	memcpy(vq->virtio_net_hdr_vaddr, ctrl, sizeof(struct virtio_pmd_ctrl));
 
 	/*
 	 * Format is enforced in qemu code:
@@ -170,14 +177,14 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 	 * One RX packet for ACK.
 	 */
 	vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
-	vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
+	vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mem;
 	vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
 	vq->vq_free_cnt--;
 	i = vq->vq_ring.desc[head].next;
 
 	for (k = 0; k < pkt_num; k++) {
 		vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
-		vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+		vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem
 			+ sizeof(struct virtio_net_ctrl_hdr)
 			+ sizeof(ctrl->status) + sizeof(uint8_t)*sum;
 		vq->vq_ring.desc[i].len = dlen[k];
@@ -187,7 +194,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 	}
 
 	vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
-	vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+	vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mem
 			+ sizeof(struct virtio_net_ctrl_hdr);
 	vq->vq_ring.desc[i].len = sizeof(ctrl->status);
 	vq->vq_free_cnt--;
@@ -232,7 +239,7 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
 	PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
 			vq->vq_free_cnt, vq->vq_desc_head_idx);
 
-	memcpy(&result, vq->virtio_net_hdr_mz->addr,
+	memcpy(&result, vq->virtio_net_hdr_vaddr,
 			sizeof(struct virtio_pmd_ctrl));
 
 	return result.status;
@@ -270,6 +277,9 @@ virtio_dev_queue_release(struct virtqueue *vq) {
 		hw = vq->hw;
 		hw->vtpci_ops->del_queue(hw, vq);
 
+		rte_memzone_free(vq->virtio_net_hdr_mz);
+		rte_memzone_free(vq->mz);
+
 		rte_free(vq->sw_ring);
 		rte_free(vq);
 	}
@@ -366,66 +376,81 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 		}
 	}
 
-	/*
-	 * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
-	 * and only accepts 32 bit page frame number.
-	 * Check if the allocated physical memory exceeds 16TB.
-	 */
-	if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
-		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
-		rte_free(vq);
-		return -ENOMEM;
-	}
-
 	memset(mz->addr, 0, sizeof(mz->len));
 	vq->mz = mz;
-	vq->vq_ring_mem = mz->phys_addr;
 	vq->vq_ring_virt_mem = mz->addr;
-	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%"PRIx64, (uint64_t)mz->phys_addr);
-	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)(uintptr_t)mz->addr);
+
+
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		vq->vq_ring_mem = mz->phys_addr;
+
+		/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
+		 * and only accepts 32 bit page frame number.
+		 * Check if the allocated physical memory exceeds 16TB.
+		 */
+		uint64_t last_physaddr = vq->vq_ring_mem + vq->vq_ring_size - 1;
+		if (last_physaddr >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
+			PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
+			rte_free(vq);
+			return -ENOMEM;
+		}
+	}
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else { /* RTE_ETH_DEV_VIRTUAL */
+		/* Use virtual addr to fill!!! */
+		vq->vq_ring_mem = (phys_addr_t)mz->addr;
+
+		/* TODO: check last_physaddr */
+	}
+#endif
+
+	PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem:      0x%"PRIx64,
+			(uint64_t)vq->vq_ring_mem);
+	PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64,
+			(uint64_t)(uintptr_t)vq->vq_ring_virt_mem);
+
 	vq->virtio_net_hdr_mz  = NULL;
 	vq->virtio_net_hdr_mem = 0;
 
+	uint64_t hdr_size = 0;
 	if (queue_type == VTNET_TQ) {
 		/*
 		 * For each xmit packet, allocate a virtio_net_hdr
 		 */
 		snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
 			dev->data->port_id, queue_idx);
-		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
-			vq_size * hw->vtnet_hdr_size,
-			socket_id, 0, RTE_CACHE_LINE_SIZE);
-		if (vq->virtio_net_hdr_mz == NULL) {
-			if (rte_errno == EEXIST)
-				vq->virtio_net_hdr_mz =
-					rte_memzone_lookup(vq_name);
-			if (vq->virtio_net_hdr_mz == NULL) {
-				rte_free(vq);
-				return -ENOMEM;
-			}
-		}
-		vq->virtio_net_hdr_mem =
-			vq->virtio_net_hdr_mz->phys_addr;
-		memset(vq->virtio_net_hdr_mz->addr, 0,
-			vq_size * hw->vtnet_hdr_size);
+		hdr_size = vq_size * hw->vtnet_hdr_size;
 	} else if (queue_type == VTNET_CQ) {
-		/* Allocate a page for control vq command, data and status */
 		snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
 			dev->data->port_id);
-		vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
-			PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE);
-		if (vq->virtio_net_hdr_mz == NULL) {
+		/* Allocate a page for control vq command, data and status */
+		hdr_size = PAGE_SIZE;
+	}
+
+	if (hdr_size) { /* queue_type is VTNET_TQ or VTNET_CQ */
+		mz = rte_memzone_reserve_aligned(vq_name,
+				hdr_size, socket_id, 0, RTE_CACHE_LINE_SIZE);
+		if (mz == NULL) {
 			if (rte_errno == EEXIST)
-				vq->virtio_net_hdr_mz =
-					rte_memzone_lookup(vq_name);
-			if (vq->virtio_net_hdr_mz == NULL) {
+				mz = rte_memzone_lookup(vq_name);
+			if (mz == NULL) {
 				rte_free(vq);
 				return -ENOMEM;
 			}
 		}
-		vq->virtio_net_hdr_mem =
-			vq->virtio_net_hdr_mz->phys_addr;
-		memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
+		vq->virtio_net_hdr_mz = mz;
+		vq->virtio_net_hdr_vaddr = mz->addr;
+		memset(vq->virtio_net_hdr_vaddr, 0, hdr_size);
+
+		if (dev->dev_type == RTE_ETH_DEV_PCI) {
+			vq->virtio_net_hdr_mem = mz->phys_addr;
+		}
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		else {
+			/* Use vaddr!!! */
+			vq->virtio_net_hdr_mem = (phys_addr_t)mz->addr;
+		}
+#endif
 	}
 
 	hw->vtpci_ops->setup_queue(hw, vq);
@@ -479,12 +504,18 @@ virtio_dev_close(struct rte_eth_dev *dev)
 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
 
 	/* reset the NIC */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if (((dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) ||
+			((dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
+			(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
 		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
+	}
 	vtpci_reset(hw);
 	hw->started = 0;
-	virtio_dev_free_mbufs(dev);
-	virtio_free_queues(dev);
+	if ((dev->data->rx_queues != NULL) && (dev->data->tx_queues != NULL)) {
+		virtio_dev_free_mbufs(dev);
+		virtio_free_queues(dev);
+	}
 }
 
 static void
@@ -983,14 +1014,30 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 	isr = vtpci_isr(hw);
 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
-	if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
-		PMD_DRV_LOG(ERR, "interrupt enable failed");
-
-	if (isr & VIRTIO_PCI_ISR_CONFIG) {
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+		if (isr & VIRTIO_PCI_ISR_CONFIG) {
+			if (virtio_dev_link_update(dev, 0) == 0)
+				_rte_eth_dev_callback_process(dev,
+						RTE_ETH_EVENT_INTR_LSC);
+		}
+	}
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
+		if (qtest_intr_enable(dev->data) < 0)
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+		/*
+		 * If last qtest message is interrupt, 'isr' will be 0
+		 * becasue socket has been closed already.
+		 * But still we want to notice this event to EAL.
+		 * So just ignore isr value.
+		 */
 		if (virtio_dev_link_update(dev, 0) == 0)
 			_rte_eth_dev_callback_process(dev,
-						      RTE_ETH_EVENT_INTR_LSC);
+					RTE_ETH_EVENT_INTR_LSC);
 	}
+#endif
 
 }
 
@@ -1014,7 +1061,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	struct virtio_hw *hw = eth_dev->data->dev_private;
 	struct virtio_net_config *config;
 	struct virtio_net_config local_config;
-	struct rte_pci_device *pci_dev;
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	struct rte_pci_id id;
 
 	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
 
@@ -1052,8 +1100,14 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 		return -1;
 
 	/* If host does not support status then disable LSC */
-	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
-		pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+		if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
+			pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		else if (eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+			eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
+#endif
+	}
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
 
@@ -1132,14 +1186,30 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
 			hw->max_rx_queues, hw->max_tx_queues);
+
+	memset(&id, 0, sizeof(id));
+	if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
+		id = pci_dev->id;
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if (eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+		id = qtest_get_pci_id_of_virtio_net();
+#endif
+
 	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
-			eth_dev->data->port_id, pci_dev->id.vendor_id,
-			pci_dev->id.device_id);
+			eth_dev->data->port_id,
+			id.vendor_id, id.device_id);
 
 	/* Setup interrupt callback  */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC))
 		rte_intr_callback_register(&pci_dev->intr_handle,
-				   virtio_interrupt_handler, eth_dev);
+				virtio_interrupt_handler, eth_dev);
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if ((eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
+			(eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))
+		qtest_intr_callback_register(eth_dev->data,
+				virtio_interrupt_handler, eth_dev);
+#endif
 
 	virtio_dev_cq_start(eth_dev);
 
@@ -1173,10 +1243,18 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->data->mac_addrs = NULL;
 
 	/* reset interrupt callback  */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC))
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if ((eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
+			(eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))
+		qtest_intr_callback_unregister(eth_dev->data,
+				virtio_interrupt_handler, eth_dev);
+#endif
+
 	vtpci_uninit(eth_dev, hw);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
@@ -1241,11 +1319,15 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 		return -ENOTSUP;
 	}
 
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if (((dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) ||
+			((dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
+			(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
 		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
 			PMD_DRV_LOG(ERR, "failed to set config vector");
 			return -EBUSY;
 		}
+	}
 
 	return 0;
 }
@@ -1260,15 +1342,31 @@ virtio_dev_start(struct rte_eth_dev *dev)
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
-		if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
-			PMD_DRV_LOG(ERR, "link status not supported by host");
-			return -ENOTSUP;
-		}
+		if (dev->dev_type == RTE_ETH_DEV_PCI) {
+			if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+				PMD_DRV_LOG(ERR,
+					"link status not supported by host");
+				return -ENOTSUP;
+			}
 
-		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
-			PMD_DRV_LOG(ERR, "interrupt enable failed");
-			return -EIO;
+			if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
 		}
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
+			if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
+				PMD_DRV_LOG(ERR,
+					"link status not supported by host");
+				return -ENOTSUP;
+			}
+			if (qtest_intr_enable(dev->data) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
+		}
+#endif
 	}
 
 	/* Initialize Link state */
@@ -1365,8 +1463,15 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
-	if (dev->data->dev_conf.intr_conf.lsc)
-		rte_intr_disable(&dev->pci_dev->intr_handle);
+	if (dev->data->dev_conf.intr_conf.lsc) {
+		if (dev->dev_type == RTE_ETH_DEV_PCI)
+			rte_intr_disable(&dev->pci_dev->intr_handle);
+
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+			qtest_intr_disable(dev->data);
+#endif
+	}
 
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
@@ -1411,7 +1516,13 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
 
-	dev_info->driver_name = dev->driver->pci_drv.name;
+	if (dev->dev_type == RTE_ETH_DEV_PCI)
+		dev_info->driver_name = dev->driver->pci_drv.name;
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+		dev_info->driver_name =  dev->data->drv_name;
+#endif
+
 	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
 	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
@@ -1439,3 +1550,196 @@ static struct rte_driver rte_virtio_driver = {
 };
 
 PMD_REGISTER_DRIVER(rte_virtio_driver);
+
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+
+#define ETH_VIRTIO_NET_ARG_QTEST_PATH           "qtest"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_PATH         "ivshmem"
+
+static const char *valid_args[] = {
+	ETH_VIRTIO_NET_ARG_QTEST_PATH,
+	ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
+	NULL
+};
+
+static int
+get_string_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	int ret, fd, loop = 3;
+	int *pfd = extra_args;
+	struct sockaddr_un sa = {0};
+
+	if ((value == NULL) || (extra_args == NULL))
+		return -EINVAL;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		return -1;
+
+	sa.sun_family = AF_UNIX;
+	strncpy(sa.sun_path, value, sizeof(sa.sun_path));
+
+	while (loop--) {
+		/*
+		 * may need to wait for qtest and ivshmem
+		 * sockets are prepared by QEMU.
+		 */
+		ret = connect(fd, (struct sockaddr *)&sa,
+				sizeof(struct sockaddr_un));
+		if (ret == 0)
+			break;
+		else
+			usleep(100000);
+	}
+
+	if (ret != 0) {
+		close(fd);
+		return -1;
+	}
+
+	*pfd = fd;
+
+	return 0;
+}
+
+static struct rte_eth_dev *
+virtio_net_eth_dev_alloc(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct rte_eth_dev_data *data;
+	struct virtio_hw *hw;
+
+	eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+	if (eth_dev == NULL)
+		rte_panic("cannot alloc rte_eth_dev\n");
+
+	data = eth_dev->data;
+
+	hw = rte_zmalloc(NULL, sizeof(*hw), 0);
+	if (!hw)
+		rte_panic("malloc virtio_hw failed\n");
+
+	data->dev_private = hw;
+	eth_dev->driver = &rte_virtio_pmd;
+	return eth_dev;
+}
+
+/*
+ * Initialization when "CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE" is enabled.
+ */
+static int
+rte_virtio_net_pmd_init(const char *name, const char *params)
+{
+	struct rte_kvargs *kvlist = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+	int ret, qtest_sock, ivshmem_sock;
+	struct rte_mem_config *mcfg;
+
+	if (params == NULL || params[0] == '\0')
+		goto error;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* Check if EAL memory consists of one memory segment */
+	if ((RTE_MAX_MEMSEG > 1) && (mcfg->memseg[1].addr != NULL)) {
+		PMD_INIT_LOG(ERR, "Non contigious memory");
+		goto error;
+	}
+
+	kvlist = rte_kvargs_parse(params, valid_args);
+	if (!kvlist) {
+		PMD_INIT_LOG(ERR, "error when parsing param");
+		goto error;
+	}
+
+	if (rte_kvargs_count(kvlist, ETH_VIRTIO_NET_ARG_IVSHMEM_PATH) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
+				&get_string_arg, &ivshmem_sock);
+		if (ret != 0) {
+			PMD_INIT_LOG(ERR,
+				"Failed to connect to ivshmem socket");
+			goto error;
+		}
+	} else {
+		PMD_INIT_LOG(ERR, "No argument specified for %s",
+				ETH_VIRTIO_NET_ARG_IVSHMEM_PATH);
+		goto error;
+	}
+
+	if (rte_kvargs_count(kvlist, ETH_VIRTIO_NET_ARG_QTEST_PATH) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_VIRTIO_NET_ARG_QTEST_PATH,
+				&get_string_arg, &qtest_sock);
+		if (ret != 0) {
+			PMD_INIT_LOG(ERR,
+				"Failed to connect to qtest socket");
+			goto error;
+		}
+	} else {
+		PMD_INIT_LOG(ERR, "No argument specified for %s",
+				ETH_VIRTIO_NET_ARG_QTEST_PATH);
+		goto error;
+	}
+
+	eth_dev = virtio_net_eth_dev_alloc(name);
+
+	qtest_vdev_init(eth_dev->data, qtest_sock, ivshmem_sock);
+
+	/* originally, this will be called in rte_eal_pci_probe() */
+	eth_virtio_dev_init(eth_dev);
+
+	eth_dev->driver = NULL;
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
+	eth_dev->data->kdrv = RTE_KDRV_NONE;
+	eth_dev->data->drv_name = "rte_virtio_pmd";
+
+	rte_kvargs_free(kvlist);
+	return 0;
+
+error:
+	rte_kvargs_free(kvlist);
+	return -EFAULT;
+}
+
+/*
+ * Finalization when "CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE" is enabled.
+ */
+static int
+rte_virtio_net_pmd_uninit(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	int ret;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return -ENODEV;
+
+	ret = eth_virtio_dev_uninit(eth_dev);
+	if (ret != 0)
+		return -EFAULT;
+
+	qtest_vdev_uninit(eth_dev->data);
+	rte_free(eth_dev->data->dev_private);
+
+	ret = rte_eth_dev_release_port(eth_dev);
+	if (ret != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+static struct rte_driver rte_virtio_net_driver = {
+	.name   = "eth_virtio_net",
+	.type   = PMD_VDEV,
+	.init   = rte_virtio_net_pmd_init,
+	.uninit = rte_virtio_net_pmd_uninit,
+};
+
+PMD_REGISTER_DRIVER(rte_virtio_net_driver);
+
+#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index fed9571..81e6465 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -123,5 +123,17 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 #define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
 			    VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
 
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+int qtest_vdev_init(struct rte_eth_dev_data *data,
+		int qtest_socket, int ivshmem_socket);
+void qtest_vdev_uninit(struct rte_eth_dev_data *data);
+void qtest_intr_callback_register(void *data,
+		rte_intr_callback_fn cb, void *cb_arg);
+void qtest_intr_callback_unregister(void *data,
+		rte_intr_callback_fn cb, void *cb_arg);
+int qtest_intr_enable(void *data);
+int qtest_intr_disable(void *data);
+struct rte_pci_id qtest_get_pci_id_of_virtio_net(void);
+#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
 
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 98eef85..2121234 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -145,6 +145,98 @@ static const struct virtio_pci_dev_ops phys_modern_dev_ops = {
 	.write32	= phys_modern_write32,
 };
 
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+static uint8_t
+virt_legacy_read8(struct virtio_hw *hw, uint8_t *addr)
+{
+	return qtest_in(hw, (uint16_t)(hw->io_base + (uint64_t)addr), 'b');
+}
+
+static uint16_t
+virt_legacy_read16(struct virtio_hw *hw, uint16_t *addr)
+{
+	return qtest_in(hw, (uint16_t)(hw->io_base + (uint64_t)addr), 'w');
+}
+
+static uint32_t
+virt_legacy_read32(struct virtio_hw *hw, uint32_t *addr)
+{
+	return qtest_in(hw, (uint16_t)(hw->io_base + (uint64_t)addr), 'l');
+}
+
+static void
+virt_legacy_write8(struct virtio_hw *hw, uint8_t *addr, uint8_t val)
+{
+	qtest_out(hw, (uint16_t)(hw->io_base + (uint64_t)addr), val, 'b');
+}
+
+static void
+virt_legacy_write16(struct virtio_hw *hw, uint16_t *addr, uint16_t val)
+{
+	qtest_out(hw, (uint16_t)(hw->io_base + (uint64_t)addr), val, 'w');
+}
+
+static void
+virt_legacy_write32(struct virtio_hw *hw, uint32_t *addr, uint32_t val)
+{
+	qtest_out(hw, (uint16_t)(hw->io_base + (uint64_t)addr), val, 'l');
+}
+
+static const struct virtio_pci_dev_ops virt_legacy_dev_ops = {
+	.read8		= virt_legacy_read8,
+	.read16		= virt_legacy_read16,
+	.read32		= virt_legacy_read32,
+	.write8		= virt_legacy_write8,
+	.write16	= virt_legacy_write16,
+	.write32	= virt_legacy_write32,
+};
+
+static uint8_t
+virt_modern_read8(struct virtio_hw *hw, uint8_t *addr)
+{
+	return qtest_read(hw, (uint64_t)addr, 'b');
+}
+
+static uint16_t
+virt_modern_read16(struct virtio_hw *hw, uint16_t *addr)
+{
+	return qtest_read(hw, (uint64_t)addr, 'w');
+}
+
+static uint32_t
+virt_modern_read32(struct virtio_hw *hw, uint32_t *addr)
+{
+	return qtest_read(hw, (uint64_t)addr, 'l');
+}
+
+static void
+virt_modern_write8(struct virtio_hw *hw, uint8_t *addr, uint8_t val)
+{
+	qtest_write(hw, (uint64_t)addr, val, 'b');
+}
+
+static void
+virt_modern_write16(struct virtio_hw *hw, uint16_t *addr, uint16_t val)
+{
+	qtest_write(hw, (uint64_t)addr, val, 'w');
+}
+
+static void
+virt_modern_write32(struct virtio_hw *hw, uint32_t *addr, uint32_t val)
+{
+	qtest_write(hw, (uint64_t)addr, val, 'l');
+}
+
+static const struct virtio_pci_dev_ops virt_modern_dev_ops = {
+	.read8		= virt_modern_read8,
+	.read16		= virt_modern_read16,
+	.read32		= virt_modern_read32,
+	.write8		= virt_modern_write8,
+	.write16	= virt_modern_write16,
+	.write32	= virt_modern_write32,
+};
+#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
+
 static int
 vtpci_dev_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
 {
@@ -154,6 +246,17 @@ vtpci_dev_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
 		else
 			hw->vtpci_dev_ops = &phys_legacy_dev_ops;
 		return 0;
+	} else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		if (strncmp(dev->data->name, "eth_virtio_net",
+				strlen("eth_virtio_net")) == 0) {
+			if (hw->modern == 1)
+				hw->vtpci_dev_ops = &virt_modern_dev_ops;
+			else
+				hw->vtpci_dev_ops = &virt_legacy_dev_ops;
+			return 0;
+		}
+#endif
 	}
 
 	PMD_DRV_LOG(ERR, "Unkown virtio-net device.");
@@ -224,12 +327,81 @@ static const struct virtio_pci_cfg_ops phys_cfg_ops = {
 	.read			= phys_read_pci_cfg,
 };
 
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+static int
+virt_map_pci_cfg(struct virtio_hw *hw __rte_unused)
+{
+	return 0;
+}
+
+static void
+virt_unmap_pci_cfg(struct virtio_hw *hw __rte_unused)
+{
+	return;
+}
+
+static int
+virt_read_pci_cfg(struct virtio_hw *hw, void *buf, size_t len, off_t offset)
+{
+	qtest_read_pci_cfg(hw, "virtio-net", buf, len, offset);
+	return 0;
+}
+
+static void *
+virt_get_mapped_addr(struct virtio_hw *hw, uint8_t bar,
+		     uint32_t offset, uint32_t length)
+{
+	uint64_t base;
+	uint64_t size;
+
+	if (qtest_get_bar_size(hw, "virtio-net", bar, &size) < 0) {
+		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
+		return NULL;
+	}
+
+	if (offset + length < offset) {
+		PMD_INIT_LOG(ERR, "offset(%u) + lenght(%u) overflows",
+			offset, length);
+		return NULL;
+	}
+
+	if (offset + length > size) {
+		PMD_INIT_LOG(ERR,
+			"invalid cap: overflows bar space: %u > %"PRIu64,
+			offset + length, size);
+		return NULL;
+	}
+
+	if (qtest_get_bar_addr(hw, "virtio-net", bar, &base) < 0) {
+		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
+		return NULL;
+	}
+
+	return (void *)(base + offset);
+}
+
+static const struct virtio_pci_cfg_ops virt_cfg_ops = {
+	.map			= virt_map_pci_cfg,
+	.unmap			= virt_unmap_pci_cfg,
+	.get_mapped_addr	= virt_get_mapped_addr,
+	.read			= virt_read_pci_cfg,
+};
+#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
+
 static int
 vtpci_cfg_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
 {
 	if (dev->dev_type == RTE_ETH_DEV_PCI) {
 		hw->vtpci_cfg_ops = &phys_cfg_ops;
 		return 0;
+	} else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		if (strncmp(dev->data->name, "eth_virtio_net",
+				strlen("eth_virtio_net")) == 0) {
+			hw->vtpci_cfg_ops = &virt_cfg_ops;
+			return 0;
+		}
+#endif
 	}
 
 	PMD_DRV_LOG(ERR, "Unkown virtio-net device.");
@@ -785,7 +957,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 	uint64_t desc_addr, avail_addr, used_addr;
 	uint16_t notify_off;
 
-	desc_addr = vq->mz->phys_addr;
+	desc_addr = vq->vq_ring_mem;
 	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
 	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
 							 ring[vq->vq_nentries]),
@@ -1019,6 +1191,14 @@ vtpci_modern_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
 
 	if (dev->dev_type == RTE_ETH_DEV_PCI)
 		pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+	else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		if (strncmp(dev->data->name, "eth_virtio_net",
+				strlen("eth_virtio_net")) == 0) {
+			dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+		}
+#endif
+	}
 
 	hw->vtpci_ops = &modern_ops;
 	hw->modern = 1;
@@ -1037,6 +1217,14 @@ vtpci_legacy_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
 			return -1;
 
 		hw->use_msix = legacy_virtio_has_msix(&pci_dev->addr);
+	} else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		if (strncmp(dev->data->name, "eth_virtio_net",
+					strlen("eth_virtio_net")) == 0) {
+			hw->use_msix = 0;
+			dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+		}
+#endif
 	}
 
 	hw->io_base = (uint32_t)(uintptr_t)
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 7b5ad54..cdc23b5 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -267,6 +267,9 @@ struct virtio_net_config;
 
 struct virtio_hw {
 	struct virtqueue *cvq;
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	void        *qsession;
+#endif
 	uint32_t    io_base;
 	uint64_t    guest_features;
 	uint32_t    max_tx_queues;
@@ -366,4 +369,17 @@ uint8_t vtpci_isr(struct virtio_hw *);
 
 uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t);
 
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+uint32_t qtest_in(struct virtio_hw *, uint16_t, char type);
+void qtest_out(struct virtio_hw *, uint16_t, uint64_t, char type);
+uint32_t qtest_read(struct virtio_hw *, uint64_t, char type);
+void qtest_write(struct virtio_hw *, uint64_t, uint64_t, char type);
+int qtest_read_pci_cfg(struct virtio_hw *hw, const char *name,
+		void *buf, size_t len, off_t offset);
+int qtest_get_bar_addr(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t *addr);
+int qtest_get_bar_size(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t *size);
+#endif
+
 #endif /* _VIRTIO_PCI_H_ */
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index 41a1366..f842c79 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -191,8 +191,7 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
 
 	start_dp = vq->vq_ring.desc;
 	start_dp[idx].addr =
-		(uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
-		- hw->vtnet_hdr_size);
+		RTE_MBUF_DATA_DMA_ADDR(cookie) - hw->vtnet_hdr_size;
 	start_dp[idx].len =
 		cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
 	start_dp[idx].flags =  VRING_DESC_F_WRITE;
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 99d4fa9..b772e04 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -66,8 +66,13 @@ struct rte_mbuf;
 
 #define VIRTQUEUE_MAX_NAME_SZ 32
 
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+#define RTE_MBUF_DATA_DMA_ADDR(mb) \
+	((uint64_t)(mb)->buf_addr + (mb)->data_off)
+#else
 #define RTE_MBUF_DATA_DMA_ADDR(mb) \
 	(uint64_t) ((mb)->buf_physaddr + (mb)->data_off)
+#endif
 
 #define VTNET_SQ_RQ_QUEUE_IDX 0
 #define VTNET_SQ_TQ_QUEUE_IDX 1
@@ -167,7 +172,8 @@ struct virtqueue {
 
 	void        *vq_ring_virt_mem;    /**< linear address of vring*/
 	unsigned int vq_ring_size;
-	phys_addr_t vq_ring_mem;          /**< physical address of vring */
+	phys_addr_t vq_ring_mem;          /**< physical address of vring for non-vdev,
+						virtual address of vring for vdev */
 
 	struct vring vq_ring;    /**< vring keeping desc, used and avail */
 	uint16_t    vq_free_cnt; /**< num of desc available */
@@ -188,6 +194,7 @@ struct virtqueue {
 	uint16_t vq_avail_idx;
 	uint64_t mbuf_initializer; /**< value to init mbufs. */
 	phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */
+	void	*virtio_net_hdr_vaddr;	/**< linear address of vring */
 
 	struct rte_mbuf **sw_ring; /**< RX software ring. */
 	/* dummy mbuf, for wraparound when processing RX ring. */
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 0/5] virtio: Add a new layer to abstract pci access method
  2016-01-21 11:07   ` [RFC PATCH 0/5] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
@ 2016-01-21 11:10     ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-21 11:10 UTC (permalink / raw)
  To: dev, yuanhan.liu, jianfeng.tan

On 2016/01/21 20:07, Tetsuya Mukawa wrote:
> This patch series are not for upstreaming.
>
> It describe how to use a new access method abstraction of "virtio-pci.c".
> Because of this, some patches are not for upstreaming.
>
> For example, below changes will be shared with Jianfeng's patches.
> So these changes are just temporary.
>  - "--shm" option to allocate EAL memory.
>  - Some changes to access to EAL memory by virtual address.
>
> Anyway, some changes are not for upstreaming, but virtual virtio-net PMD
> should work with QEMU as described in commit log.
>
> Tetsuya Mukawa (5):
>   virtio: Change the parameter order of io_write8/16/32()
>   virtio: move rte_eal_pci_unmap_device() to virtio_pci.c
>   virtio: Add a new layer to abstract pci access method
>   EAL: Add new EAL "--shm" option.
>   virtio: Extend virtio-net PMD to support container environment
>
>  config/common_linuxapp                     |    1 +
>  drivers/net/virtio/Makefile                |    4 +
>  drivers/net/virtio/qtest.c                 | 1237 ++++++++++++++++++++++++++++
>  drivers/net/virtio/virtio_ethdev.c         |  454 ++++++++--
>  drivers/net/virtio/virtio_ethdev.h         |   12 +
>  drivers/net/virtio/virtio_pci.c            |  732 ++++++++++++----
>  drivers/net/virtio/virtio_pci.h            |   39 +-
>  drivers/net/virtio/virtio_rxtx.c           |    3 +-
>  drivers/net/virtio/virtqueue.h             |    9 +-
>  lib/librte_eal/common/eal_common_options.c |    5 +
>  lib/librte_eal/common/eal_internal_cfg.h   |    1 +
>  lib/librte_eal/common/eal_options.h        |    2 +
>  lib/librte_eal/common/include/rte_memory.h |    5 +
>  lib/librte_eal/linuxapp/eal/eal_memory.c   |   76 ++
>  14 files changed, 2337 insertions(+), 243 deletions(-)
>  create mode 100644 drivers/net/virtio/qtest.c
>

Hi Yuanhan and Jianfeng,

Here is example how to use this new abstraction.
Please check first 3 patches to know how to implement the abstraction.

Also please see changes for "virtio_pci.c" involved in last patch.
This is the example.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 4/5] EAL: Add new EAL "--shm" option.
  2016-01-21 11:07   ` [RFC PATCH 4/5] EAL: Add new EAL "--shm" option Tetsuya Mukawa
@ 2016-01-22  1:43     ` Tan, Jianfeng
  2016-01-22  2:07       ` Tan, Jianfeng
  0 siblings, 1 reply; 120+ messages in thread
From: Tan, Jianfeng @ 2016-01-22  1:43 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu

Hi Tetsuya,

On 1/21/2016 7:07 PM, Tetsuya Mukawa wrote:
> This is a temporary patch to get EAL memory under 16T(1 << 44).
>
> The patch adds new EAL "--shm" option. If the option is specified,
> EAL will allocate one file from hugetlbfs. This memory is for sharing
> memory between DPDK applicaiton and QEMU ivhsmem device.
>
> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> ---
>   lib/librte_eal/common/eal_common_options.c |  5 ++
>   lib/librte_eal/common/eal_internal_cfg.h   |  1 +
>   lib/librte_eal/common/eal_options.h        |  2 +
>   lib/librte_eal/common/include/rte_memory.h |  5 ++
>   lib/librte_eal/linuxapp/eal/eal_memory.c   | 76 ++++++++++++++++++++++++++++++
>   5 files changed, 89 insertions(+)
>
...
>   }
>   
> +int
> +rte_memseg_info_get(int index, int *pfd, uint64_t *psize, void **paddr)
> +{
> +	struct rte_mem_config *mcfg;
> +	mcfg = rte_eal_get_configuration()->mem_config;
> +
> +	if (pfd != NULL)
> +		*pfd = mcfg->memseg[index].fd;
> +	if (psize != NULL)
> +		*psize = (uint64_t)mcfg->memseg[index].len;
> +	if (paddr != NULL)
> +		*paddr = (void *)(uint64_t)mcfg->memseg[index].addr;
> +	return 0;
> +}

In my patch, I introduce another API to get memseg info. In my mind, no 
reason to keep those FDs open. How do you think?

> +
>   /*
>    * Get physical address of any mapped virtual address in the current process.
>    */
> @@ -1075,6 +1090,46 @@ calc_num_pages_per_socket(uint64_t * memory,
>   	return total_num_pages;
>   }
>   
> +static void *
> +rte_eal_shm_create(int *pfd, const char *hugedir)
> +{
> +	int ret, fd;
> +	char filepath[256];
> +	void *vaddr;
> +	uint64_t size = internal_config.memory;
> +
> +	sprintf(filepath, "%s/%s_cvio", hugedir,
> +			internal_config.hugefile_prefix);
> +
> +	fd = open(filepath, O_CREAT | O_RDWR, 0600);
> +	if (fd < 0)
> +		rte_panic("open %s failed: %s\n", filepath, strerror(errno));
> +
> +	ret = flock(fd, LOCK_EX);
> +	if (ret < 0) {
> +		close(fd);
> +		rte_panic("flock %s failed: %s\n", filepath, strerror(errno));
> +	}
> +
> +	ret = ftruncate(fd, size);
> +	if (ret < 0)
> +		rte_panic("ftruncate failed: %s\n", strerror(errno));
> +
> +	/*
> +	 * Here, we need to map under (1 << 44).
> +	 * This is temporary implementation.
> +	 */
> +	vaddr = mmap((void *)(1ULL << 43), size, PROT_READ | PROT_WRITE,
> +			MAP_SHARED | MAP_FIXED, fd, 0);
> +	if (vaddr != MAP_FAILED) {
> +		memset(vaddr, 0, size);
> +		*pfd = fd;
> +	}

I'm not sure if hard-coded way is good enough. It's known that kernel 
manages VMAs using red-black tree, but I don't know if kernel allocates 
VMA from low address to high address (if yes, can we leverage this 
feature?).

> +	memset(vaddr, 0, size);
> +
> +	return vaddr;
> +}
> +
>   /*
>    * Prepare physical memory mapping: fill configuration structure with
>    * these infos, return 0 on success.
> @@ -1127,6 +1182,27 @@ rte_eal_hugepage_init(void)
>   		return 0;
>   	}
>   
> +	/* create shared memory consist of only one file */
> +	if (internal_config.shm) {
> +		int fd;
> +		struct hugepage_info *hpi;
> +
> +		hpi = &internal_config.hugepage_info[0];
> +		addr = rte_eal_shm_create(&fd, hpi->hugedir);
> +		if (addr == MAP_FAILED) {
> +			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
> +					strerror(errno));
> +			return -1;
> +		}
> +		mcfg->memseg[0].phys_addr = rte_mem_virt2phy(addr);
> +		mcfg->memseg[0].addr = addr;
> +		mcfg->memseg[0].hugepage_sz = hpi->hugepage_sz;
> +		mcfg->memseg[0].len = internal_config.memory;
> +		mcfg->memseg[0].socket_id = 0;

As pointed out in my patchset, hard-coded socket_id into 0 may lead to 
failure. Do you have any better idea?

Thanks,
Jianfeng

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 4/5] EAL: Add new EAL "--shm" option.
  2016-01-22  1:43     ` Tan, Jianfeng
@ 2016-01-22  2:07       ` Tan, Jianfeng
  2016-01-22  3:23         ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Tan, Jianfeng @ 2016-01-22  2:07 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu

Hi Tetsuya,

On 1/22/2016 9:43 AM, Tan, Jianfeng wrote:
> Hi Tetsuya,
>
> On 1/21/2016 7:07 PM, Tetsuya Mukawa wrote:
>> This is a temporary patch to get EAL memory under 16T(1 << 44).
>>
>> The patch adds new EAL "--shm" option. If the option is specified,
>> EAL will allocate one file from hugetlbfs. This memory is for sharing
>> memory between DPDK applicaiton and QEMU ivhsmem device.
>>
>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>> ---
>>   lib/librte_eal/common/eal_common_options.c |  5 ++
>>   lib/librte_eal/common/eal_internal_cfg.h   |  1 +
>>   lib/librte_eal/common/eal_options.h        |  2 +
>>   lib/librte_eal/common/include/rte_memory.h |  5 ++
>>   lib/librte_eal/linuxapp/eal/eal_memory.c   | 76 
>> ++++++++++++++++++++++++++++++
>>   5 files changed, 89 insertions(+)
>>
>
...
>> +    vaddr = mmap((void *)(1ULL << 43), size, PROT_READ | PROT_WRITE,
>> +            MAP_SHARED | MAP_FIXED, fd, 0);
>> +    if (vaddr != MAP_FAILED) {
>> +        memset(vaddr, 0, size);
>> +        *pfd = fd;
>> +    }
>
> I'm not sure if hard-coded way is good enough. It's known that kernel 
> manages VMAs using red-black tree, but I don't know if kernel 
> allocates VMA from low address to high address (if yes, can we 
> leverage this feature?).
>

A little more:it seems that kernel uses arch_get_unmapped_area_topdown() 
-> unmapped_area_topdown() to do that, which starts at 
mm->highest_vm_end. If this value bigger than (1ULL << 44)?

Thanks,
Jianfeng

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 4/5] EAL: Add new EAL "--shm" option.
  2016-01-22  2:07       ` Tan, Jianfeng
@ 2016-01-22  3:23         ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-22  3:23 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 2016/01/22 11:07, Tan, Jianfeng wrote:
> Hi Tetsuya,
>
> On 1/22/2016 9:43 AM, Tan, Jianfeng wrote:
>> Hi Tetsuya,
>>
>> On 1/21/2016 7:07 PM, Tetsuya Mukawa wrote:
>>> This is a temporary patch to get EAL memory under 16T(1 << 44).
>>>
>>> The patch adds new EAL "--shm" option. If the option is specified,
>>> EAL will allocate one file from hugetlbfs. This memory is for sharing
>>> memory between DPDK applicaiton and QEMU ivhsmem device.
>>>
>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>> ---
>>>   lib/librte_eal/common/eal_common_options.c |  5 ++
>>>   lib/librte_eal/common/eal_internal_cfg.h   |  1 +
>>>   lib/librte_eal/common/eal_options.h        |  2 +
>>>   lib/librte_eal/common/include/rte_memory.h |  5 ++
>>>   lib/librte_eal/linuxapp/eal/eal_memory.c   | 76
>>> ++++++++++++++++++++++++++++++
>>>   5 files changed, 89 insertions(+)
>>>
>>
> ...
>>> +    vaddr = mmap((void *)(1ULL << 43), size, PROT_READ | PROT_WRITE,
>>> +            MAP_SHARED | MAP_FIXED, fd, 0);
>>> +    if (vaddr != MAP_FAILED) {
>>> +        memset(vaddr, 0, size);
>>> +        *pfd = fd;
>>> +    }
>>
>> I'm not sure if hard-coded way is good enough. It's known that kernel
>> manages VMAs using red-black tree, but I don't know if kernel
>> allocates VMA from low address to high address (if yes, can we
>> leverage this feature?).
>>
>

Hi Jianfeng,

Thanks for comments.
Yes, this "--shm" patch is totally crap, and this patch series is just
for describing how to implement an access method abstraction layer and
how to use it. Main purpose of this patch series is this.

But without this "--shm" patch, if someone wants to test my patch, they
cannot do it.
Because of this, I just involved this patch.
I guess you have already implemented almost same feature in EAL, I will
use it when I submit my container patch.
If we can agree how to implement the access method abstraction layer, I
will submit my container patch separately.

> A little more:it seems that kernel uses
> arch_get_unmapped_area_topdown() -> unmapped_area_topdown() to do
> that, which starts at mm->highest_vm_end. If this value bigger than
> (1ULL << 44)?
>

Yes, I specified MAP_FIXED as mmap parameter.
Then arch_get_unmapped_area_topdown() will not go "unmapped_area_topdown()".
Anyway, we will need to check /proc/self/maps to determine where we can
mmap the memory.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 3/5] virtio: Add a new layer to abstract pci access method
  2016-01-21 11:07   ` [RFC PATCH 3/5] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
@ 2016-01-22  7:26     ` Xie, Huawei
  2016-01-22  7:35       ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-22  7:26 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/21/2016 7:08 PM, Tetsuya Mukawa wrote:
> +static void
> +phys_legacy_write16(struct virtio_hw *hw, uint16_t *addr, uint16_t val)
> +{
> +	return outb_p((unsigned short)val,
> +			(unsigned short)(hw->io_base + (uint64_t)addr));

outb_p -> outw_p

> +}
> +
> +static void
> +phys_legacy_write32(struct virtio_hw *hw, uint32_t *addr, uint32_t val)
> +{
> +	return outb_p((unsigned int)val,
> +			(unsigned short)(hw->io_base + (uint64_t)addr));

outb_p -> outl_p

> +}
> +


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 3/5] virtio: Add a new layer to abstract pci access method
  2016-01-22  7:26     ` Xie, Huawei
@ 2016-01-22  7:35       ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-22  7:35 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/22 16:26, Xie, Huawei wrote:
> On 1/21/2016 7:08 PM, Tetsuya Mukawa wrote:
>> +static void
>> +phys_legacy_write16(struct virtio_hw *hw, uint16_t *addr, uint16_t val)
>> +{
>> +	return outb_p((unsigned short)val,
>> +			(unsigned short)(hw->io_base + (uint64_t)addr));
> outb_p -> outw_p
>
>> +}
>> +
>> +static void
>> +phys_legacy_write32(struct virtio_hw *hw, uint32_t *addr, uint32_t val)
>> +{
>> +	return outb_p((unsigned int)val,
>> +			(unsigned short)(hw->io_base + (uint64_t)addr));
> outb_p -> outl_p
>

Oops, thanks!

Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
@ 2016-01-22  8:14     ` Xie, Huawei
  2016-01-22 10:37       ` Tetsuya Mukawa
  2016-01-25 10:17     ` Xie, Huawei
                       ` (11 subsequent siblings)
  12 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-22  8:14 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
> virtio: Extend virtio-net PMD to support container environment
>
> The patch adds a new virtio-net PMD configuration that allows the PMD to
> work on host as if the PMD is in VM.
> Here is new configuration for virtio-net PMD.
>  - CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE
> To use this mode, EAL needs physically contiguous memory. To allocate
> such memory, add "--shm" option to application command line.
>
> To prepare virtio-net device on host, the users need to invoke QEMU
> process in special qtest mode. This mode is mainly used for testing QEMU
> devices from outer process. In this mode, no guest runs.
> Here is QEMU command line.
>
>  $ qemu-system-x86_64 \
>              -machine pc-i440fx-1.4,accel=qtest \
>              -display none -qtest-log /dev/null \
>              -qtest unix:/tmp/socket,server \
>              -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1\
>              -device virtio-net-pci,netdev=net0,mq=on \
>              -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>              -device ivshmem,size=1G,chardev=chr1,vectors=1
>
>  * QEMU process is needed per port.

Does qtest supports hot plug virtio-net pci device, so that we could run
one QEMU process in host, which provisions the virtio-net virtual
devices for the container?

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-22  8:14     ` Xie, Huawei
@ 2016-01-22 10:37       ` Tetsuya Mukawa
  2016-01-25 10:15         ` Xie, Huawei
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-22 10:37 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/22 17:14, Xie, Huawei wrote:
> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>> virtio: Extend virtio-net PMD to support container environment
>>
>> The patch adds a new virtio-net PMD configuration that allows the PMD to
>> work on host as if the PMD is in VM.
>> Here is new configuration for virtio-net PMD.
>>  - CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE
>> To use this mode, EAL needs physically contiguous memory. To allocate
>> such memory, add "--shm" option to application command line.
>>
>> To prepare virtio-net device on host, the users need to invoke QEMU
>> process in special qtest mode. This mode is mainly used for testing QEMU
>> devices from outer process. In this mode, no guest runs.
>> Here is QEMU command line.
>>
>>  $ qemu-system-x86_64 \
>>              -machine pc-i440fx-1.4,accel=qtest \
>>              -display none -qtest-log /dev/null \
>>              -qtest unix:/tmp/socket,server \
>>              -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1\
>>              -device virtio-net-pci,netdev=net0,mq=on \
>>              -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>>              -device ivshmem,size=1G,chardev=chr1,vectors=1
>>
>>  * QEMU process is needed per port.
> Does qtest supports hot plug virtio-net pci device, so that we could run
> one QEMU process in host, which provisions the virtio-net virtual
> devices for the container?

Theoretically, we can use hot plug in some cases.
But I guess we have 3 concerns here.

1. Security.
If we share QEMU process between multiple DPDK applications, this QEMU
process will have all fds of  the applications on different containers.
In some cases, it will be security concern.
So, I guess we need to support current 1:1 configuration at least.

2. shared memory.
Currently, QEMU and DPDK application will map shared memory using same
virtual address.
So if multiple DPDK application connects to one QEMU process, each DPDK
application should have different address for shared memory. I guess
this will be a big limitation.

3. PCI bridge.
So far, QEMU has one PCI bridge, so we can connect almost 10 PCI devices
to QEMU.
(I forget correct number, but it's almost 10, because some slots are
reserved by QEMU)
A DPDK application needs both virtio-net and ivshmem device, so I guess
almost 5 DPDK applications can connect to one QEMU process, so far.
To add more PCI bridges solves this.
But we need to add a lot of implementation to support cascaded PCI
bridges and PCI devices.
(Also we need to solve above "2nd" concern.)

Anyway, if we use virtio-net PMD and vhost-user PMD, QEMU process will
not do anything after initialization.
(QEMU will try to read a qtest socket, then be stopped because there is
no message after initialization)
So I guess we can ignore overhead of these QEMU processes.
If someone cannot ignore it, I guess this is the one of cases that it's
nice to use your light weight container implementation.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-22 10:37       ` Tetsuya Mukawa
@ 2016-01-25 10:15         ` Xie, Huawei
  2016-01-26  2:58           ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-25 10:15 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/22/2016 6:38 PM, Tetsuya Mukawa wrote:
> On 2016/01/22 17:14, Xie, Huawei wrote:
>> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>>> virtio: Extend virtio-net PMD to support container environment
>>>
>>> The patch adds a new virtio-net PMD configuration that allows the PMD to
>>> work on host as if the PMD is in VM.
>>> Here is new configuration for virtio-net PMD.
>>>  - CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE
>>> To use this mode, EAL needs physically contiguous memory. To allocate
>>> such memory, add "--shm" option to application command line.
>>>
>>> To prepare virtio-net device on host, the users need to invoke QEMU
>>> process in special qtest mode. This mode is mainly used for testing QEMU
>>> devices from outer process. In this mode, no guest runs.
>>> Here is QEMU command line.
>>>
>>>  $ qemu-system-x86_64 \
>>>              -machine pc-i440fx-1.4,accel=qtest \
>>>              -display none -qtest-log /dev/null \
>>>              -qtest unix:/tmp/socket,server \
>>>              -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1\
>>>              -device virtio-net-pci,netdev=net0,mq=on \
>>>              -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>>>              -device ivshmem,size=1G,chardev=chr1,vectors=1
>>>
>>>  * QEMU process is needed per port.
>> Does qtest supports hot plug virtio-net pci device, so that we could run
>> one QEMU process in host, which provisions the virtio-net virtual
>> devices for the container?
> Theoretically, we can use hot plug in some cases.
> But I guess we have 3 concerns here.
>
> 1. Security.
> If we share QEMU process between multiple DPDK applications, this QEMU
> process will have all fds of  the applications on different containers.
> In some cases, it will be security concern.
> So, I guess we need to support current 1:1 configuration at least.
>
> 2. shared memory.
> Currently, QEMU and DPDK application will map shared memory using same
> virtual address.
> So if multiple DPDK application connects to one QEMU process, each DPDK
> application should have different address for shared memory. I guess
> this will be a big limitation.
>
> 3. PCI bridge.
> So far, QEMU has one PCI bridge, so we can connect almost 10 PCI devices
> to QEMU.
> (I forget correct number, but it's almost 10, because some slots are
> reserved by QEMU)
> A DPDK application needs both virtio-net and ivshmem device, so I guess
> almost 5 DPDK applications can connect to one QEMU process, so far.
> To add more PCI bridges solves this.
> But we need to add a lot of implementation to support cascaded PCI
> bridges and PCI devices.
> (Also we need to solve above "2nd" concern.)
>
> Anyway, if we use virtio-net PMD and vhost-user PMD, QEMU process will
> not do anything after initialization.
> (QEMU will try to read a qtest socket, then be stopped because there is
> no message after initialization)
> So I guess we can ignore overhead of these QEMU processes.
> If someone cannot ignore it, I guess this is the one of cases that it's
> nice to use your light weight container implementation.

Thanks for the explanation, and also in your opinion where is the best
place to run the QEMU instance? If we run QEMU instances in host, for
vhost-kernel support, we could get rid of the root privilege issue.

Another issue is do you plan to support multiple virtio devices in
container? Currently i find the code assuming only one virtio-net device
in QEMU, right?

Btw, i have read most of your qtest code. No obvious issues found so far
but quite a couple of nits. You must have spent a lot of time on this.
It is great work!

> Thanks,
> Tetsuya
>


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
  2016-01-22  8:14     ` Xie, Huawei
@ 2016-01-25 10:17     ` Xie, Huawei
  2016-01-26  2:58       ` Tetsuya Mukawa
  2016-01-25 10:29     ` Xie, Huawei
                       ` (10 subsequent siblings)
  12 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-25 10:17 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
> +static void
> +qtest_handle_one_message(struct qtest_session *s, char *buf)
> +{
> +	int ret;
> +
> +	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
> +		if (rte_atomic16_read(&s->enable_intr) == 0)
> +			return;
> +
> +		/* relay interrupt to pipe */
> +		ret = write(s->irqfds.writefd, "1", 1);

How about the interrupt latency? Seems it is quite long.

> +		if (ret < 0)
> +			rte_panic("cannot relay interrupt\n");
> +	} else {
> +		/* relay normal message to pipe */
> +		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
> +		if (ret < 0)
> +			rte_panic("cannot relay normal message\n");
> +	}
> +}


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
  2016-01-22  8:14     ` Xie, Huawei
  2016-01-25 10:17     ` Xie, Huawei
@ 2016-01-25 10:29     ` Xie, Huawei
  2016-01-26  2:58       ` Tetsuya Mukawa
  2016-01-27 10:03     ` Xie, Huawei
                       ` (9 subsequent siblings)
  12 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-25 10:29 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
> +#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
> +	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
> +	((_function) & 0xf) << 8 | ((_offset) & 0xfc))

(_function) & 0x7 ?

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-25 10:15         ` Xie, Huawei
@ 2016-01-26  2:58           ` Tetsuya Mukawa
  2016-01-27  9:39             ` Xie, Huawei
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-26  2:58 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/25 19:15, Xie, Huawei wrote:
> On 1/22/2016 6:38 PM, Tetsuya Mukawa wrote:
>> On 2016/01/22 17:14, Xie, Huawei wrote:
>>> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>>>> virtio: Extend virtio-net PMD to support container environment
>>>>
>>>> The patch adds a new virtio-net PMD configuration that allows the PMD to
>>>> work on host as if the PMD is in VM.
>>>> Here is new configuration for virtio-net PMD.
>>>>  - CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE
>>>> To use this mode, EAL needs physically contiguous memory. To allocate
>>>> such memory, add "--shm" option to application command line.
>>>>
>>>> To prepare virtio-net device on host, the users need to invoke QEMU
>>>> process in special qtest mode. This mode is mainly used for testing QEMU
>>>> devices from outer process. In this mode, no guest runs.
>>>> Here is QEMU command line.
>>>>
>>>>  $ qemu-system-x86_64 \
>>>>              -machine pc-i440fx-1.4,accel=qtest \
>>>>              -display none -qtest-log /dev/null \
>>>>              -qtest unix:/tmp/socket,server \
>>>>              -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1\
>>>>              -device virtio-net-pci,netdev=net0,mq=on \
>>>>              -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>>>>              -device ivshmem,size=1G,chardev=chr1,vectors=1
>>>>
>>>>  * QEMU process is needed per port.
>>> Does qtest supports hot plug virtio-net pci device, so that we could run
>>> one QEMU process in host, which provisions the virtio-net virtual
>>> devices for the container?
>> Theoretically, we can use hot plug in some cases.
>> But I guess we have 3 concerns here.
>>
>> 1. Security.
>> If we share QEMU process between multiple DPDK applications, this QEMU
>> process will have all fds of  the applications on different containers.
>> In some cases, it will be security concern.
>> So, I guess we need to support current 1:1 configuration at least.
>>
>> 2. shared memory.
>> Currently, QEMU and DPDK application will map shared memory using same
>> virtual address.
>> So if multiple DPDK application connects to one QEMU process, each DPDK
>> application should have different address for shared memory. I guess
>> this will be a big limitation.
>>
>> 3. PCI bridge.
>> So far, QEMU has one PCI bridge, so we can connect almost 10 PCI devices
>> to QEMU.
>> (I forget correct number, but it's almost 10, because some slots are
>> reserved by QEMU)
>> A DPDK application needs both virtio-net and ivshmem device, so I guess
>> almost 5 DPDK applications can connect to one QEMU process, so far.
>> To add more PCI bridges solves this.
>> But we need to add a lot of implementation to support cascaded PCI
>> bridges and PCI devices.
>> (Also we need to solve above "2nd" concern.)
>>
>> Anyway, if we use virtio-net PMD and vhost-user PMD, QEMU process will
>> not do anything after initialization.
>> (QEMU will try to read a qtest socket, then be stopped because there is
>> no message after initialization)
>> So I guess we can ignore overhead of these QEMU processes.
>> If someone cannot ignore it, I guess this is the one of cases that it's
>> nice to use your light weight container implementation.
> Thanks for the explanation, and also in your opinion where is the best
> place to run the QEMU instance? If we run QEMU instances in host, for
> vhost-kernel support, we could get rid of the root privilege issue.

Do you mean below?
If we deploy QEMU instance on host, we can start a container without the
root privilege.
(But on host, still QEMU instance needs the privilege to access to
vhost-kernel)

If so, I agree to deploy QEMU instance on host or other privileged
container will be nice.
In the case of vhost-user, to deploy on host or non-privileged container
will be good.

>
> Another issue is do you plan to support multiple virtio devices in
> container? Currently i find the code assuming only one virtio-net device
> in QEMU, right?

Yes, so far, 1 port needs 1 QEMU instance.
So if you need multiple virtio devices, you need to invoke multiple QEMU
instances.

Do you want to deploy 1 QEMU instance for each DPDK application, even if
the application has multiple virtio-net ports?

So far, I am not sure whether we need it, because this type of DPDK
application will need only one port in most cases.
But if you need this, yes, I can implement using QEMU PCI hotplug feature.
(But probably we can only attach almost 10 ports. This will be limitation.)

>
> Btw, i have read most of your qtest code. No obvious issues found so far
> but quite a couple of nits. You must have spent a lot of time on this.
> It is great work!

I appreciate your reviewing!

BTW, my container implementation needed a QEMU patch in the case of
vhost-user.
But the patch has been merged in upstream QEMU, so we don't have this
limitation any more.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-25 10:17     ` Xie, Huawei
@ 2016-01-26  2:58       ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-26  2:58 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/25 19:17, Xie, Huawei wrote:
> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>> +static void
>> +qtest_handle_one_message(struct qtest_session *s, char *buf)
>> +{
>> +	int ret;
>> +
>> +	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
>> +		if (rte_atomic16_read(&s->enable_intr) == 0)
>> +			return;
>> +
>> +		/* relay interrupt to pipe */
>> +		ret = write(s->irqfds.writefd, "1", 1);
> How about the interrupt latency? Seems it is quite long.

Yes, I agree with it.
Probably using evetfd or removing this read/write mechanism to handle
interrupts will be nice.
Let me check it more.

Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-25 10:29     ` Xie, Huawei
@ 2016-01-26  2:58       ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-26  2:58 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/25 19:29, Xie, Huawei wrote:
> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>> +#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
>> +	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
>> +	((_function) & 0xf) << 8 | ((_offset) & 0xfc))
> (_function) & 0x7 ?

Yes, you are correct.
I will fix it.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-26  2:58           ` Tetsuya Mukawa
@ 2016-01-27  9:39             ` Xie, Huawei
  2016-01-28  2:33               ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-27  9:39 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/26/2016 10:58 AM, Tetsuya Mukawa wrote:
> On 2016/01/25 19:15, Xie, Huawei wrote:
>> On 1/22/2016 6:38 PM, Tetsuya Mukawa wrote:
>>> On 2016/01/22 17:14, Xie, Huawei wrote:
>>>> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>>>>> virtio: Extend virtio-net PMD to support container environment
>>>>>
>>>>> The patch adds a new virtio-net PMD configuration that allows the PMD to
>>>>> work on host as if the PMD is in VM.
>>>>> Here is new configuration for virtio-net PMD.
>>>>>  - CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE
>>>>> To use this mode, EAL needs physically contiguous memory. To allocate
>>>>> such memory, add "--shm" option to application command line.
>>>>>
>>>>> To prepare virtio-net device on host, the users need to invoke QEMU
>>>>> process in special qtest mode. This mode is mainly used for testing QEMU
>>>>> devices from outer process. In this mode, no guest runs.
>>>>> Here is QEMU command line.
>>>>>
>>>>>  $ qemu-system-x86_64 \
>>>>>              -machine pc-i440fx-1.4,accel=qtest \
>>>>>              -display none -qtest-log /dev/null \
>>>>>              -qtest unix:/tmp/socket,server \
>>>>>              -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1\
>>>>>              -device virtio-net-pci,netdev=net0,mq=on \
>>>>>              -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>>>>>              -device ivshmem,size=1G,chardev=chr1,vectors=1
>>>>>
>>>>>  * QEMU process is needed per port.
>>>> Does qtest supports hot plug virtio-net pci device, so that we could run
>>>> one QEMU process in host, which provisions the virtio-net virtual
>>>> devices for the container?
>>> Theoretically, we can use hot plug in some cases.
>>> But I guess we have 3 concerns here.
>>>
>>> 1. Security.
>>> If we share QEMU process between multiple DPDK applications, this QEMU
>>> process will have all fds of  the applications on different containers.
>>> In some cases, it will be security concern.
>>> So, I guess we need to support current 1:1 configuration at least.
>>>
>>> 2. shared memory.
>>> Currently, QEMU and DPDK application will map shared memory using same
>>> virtual address.
>>> So if multiple DPDK application connects to one QEMU process, each DPDK
>>> application should have different address for shared memory. I guess
>>> this will be a big limitation.
>>>
>>> 3. PCI bridge.
>>> So far, QEMU has one PCI bridge, so we can connect almost 10 PCI devices
>>> to QEMU.
>>> (I forget correct number, but it's almost 10, because some slots are
>>> reserved by QEMU)
>>> A DPDK application needs both virtio-net and ivshmem device, so I guess
>>> almost 5 DPDK applications can connect to one QEMU process, so far.
>>> To add more PCI bridges solves this.
>>> But we need to add a lot of implementation to support cascaded PCI
>>> bridges and PCI devices.
>>> (Also we need to solve above "2nd" concern.)
>>>
>>> Anyway, if we use virtio-net PMD and vhost-user PMD, QEMU process will
>>> not do anything after initialization.
>>> (QEMU will try to read a qtest socket, then be stopped because there is
>>> no message after initialization)
>>> So I guess we can ignore overhead of these QEMU processes.
>>> If someone cannot ignore it, I guess this is the one of cases that it's
>>> nice to use your light weight container implementation.
>> Thanks for the explanation, and also in your opinion where is the best
>> place to run the QEMU instance? If we run QEMU instances in host, for
>> vhost-kernel support, we could get rid of the root privilege issue.
> Do you mean below?
> If we deploy QEMU instance on host, we can start a container without the
> root privilege.
> (But on host, still QEMU instance needs the privilege to access to
> vhost-kernel)

There is no issue running QEMU instance with root privilege on host, but
i think it is not acceptable granting the container root privilege.

>
> If so, I agree to deploy QEMU instance on host or other privileged
> container will be nice.
> In the case of vhost-user, to deploy on host or non-privileged container
> will be good.
>
>> Another issue is do you plan to support multiple virtio devices in
>> container? Currently i find the code assuming only one virtio-net device
>> in QEMU, right?
> Yes, so far, 1 port needs 1 QEMU instance.
> So if you need multiple virtio devices, you need to invoke multiple QEMU
> instances.
>
> Do you want to deploy 1 QEMU instance for each DPDK application, even if
> the application has multiple virtio-net ports?
>
> So far, I am not sure whether we need it, because this type of DPDK
> application will need only one port in most cases.
> But if you need this, yes, I can implement using QEMU PCI hotplug feature.
> (But probably we can only attach almost 10 ports. This will be limitation.)

I am OK with supporting one virtio device for the first version.

>
>> Btw, i have read most of your qtest code. No obvious issues found so far
>> but quite a couple of nits. You must have spent a lot of time on this.
>> It is great work!
> I appreciate your reviewing!
>
> BTW, my container implementation needed a QEMU patch in the case of
> vhost-user.
> But the patch has been merged in upstream QEMU, so we don't have this
> limitation any more.

Great, better put the QEMU dependency information in the commit message
>
> Thanks,
> Tetsuya
>


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (2 preceding siblings ...)
  2016-01-25 10:29     ` Xie, Huawei
@ 2016-01-27 10:03     ` Xie, Huawei
  2016-01-28  2:44       ` Tetsuya Mukawa
  2016-01-27 15:58     ` Xie, Huawei
                       ` (8 subsequent siblings)
  12 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-27 10:03 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
> +	/* Set BAR region */
> +	for (i = 0; i < NB_BAR; i++) {
> +		switch (dev->bar[i].type) {
> +		case QTEST_PCI_BAR_IO:
> +		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
> +		case QTEST_PCI_BAR_MEMORY_32:
> +			qtest_pci_outl(s, bus, device, 0, dev->bar[i].addr,
> +				dev->bar[i].region_start);
> +			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
> +				dev->name, dev->bar[i].region_start,
> +				dev->bar[i].region_start + dev->bar[i].region_size);
> +			break;
> +		case QTEST_PCI_BAR_MEMORY_64:
> +			qtest_pci_outq(s, bus, device, 0, dev->bar[i].addr,
> +				dev->bar[i].region_start);
> +			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
> +				dev->name, dev->bar[i].region_start,
> +				dev->bar[i].region_start + dev->bar[i].region_size);
> +			break;

Hasn't the bar resource already been allocated? Is it the app's
responsibility to allocate the bar resource in qtest mode? The app
couldn't have that knowledge.

> +		case QTEST_PCI_BAR_DISABLE:
> +			break;
> +		}
> +	}
> +


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (3 preceding siblings ...)
  2016-01-27 10:03     ` Xie, Huawei
@ 2016-01-27 15:58     ` Xie, Huawei
  2016-01-28  2:47       ` Tetsuya Mukawa
  2016-01-27 16:45     ` Xie, Huawei
                       ` (7 subsequent siblings)
  12 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-27 15:58 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
[snip]
> +
> +static int
> +qtest_raw_recv(int fd, char *buf, size_t count)
> +{
> +	size_t len = count;
> +	size_t total_len = 0;
> +	int ret = 0;
> +
> +	while (len > 0) {
> +		ret = read(fd, buf, len);
> +		if (ret == (int)len)
> +			break;
> +		if (*(buf + ret - 1) == '\n')
> +			break;

The above two lines should be put after the below if block.

> +		if (ret == -1) {
> +			if (errno == EINTR)
> +				continue;
> +			return ret;
> +		}
> +		total_len += ret;
> +		buf += ret;
> +		len -= ret;
> +	}
> +	return total_len + ret;
> +}
> +

[snip]

> +
> +static void
> +qtest_handle_one_message(struct qtest_session *s, char *buf)
> +{
> +	int ret;
> +
> +	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
> +		if (rte_atomic16_read(&s->enable_intr) == 0)
> +			return;
> +
> +		/* relay interrupt to pipe */
> +		ret = write(s->irqfds.writefd, "1", 1);
> +		if (ret < 0)
> +			rte_panic("cannot relay interrupt\n");
> +	} else {
> +		/* relay normal message to pipe */
> +		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
> +		if (ret < 0)
> +			rte_panic("cannot relay normal message\n");
> +	}
> +}
> +
> +static char *
> +qtest_get_next_message(char *p)
> +{
> +	p = strchr(p, '\n');
> +	if ((p == NULL) || (*(p + 1) == '\0'))
> +		return NULL;
> +	return p + 1;
> +}
> +
> +static void
> +qtest_close_one_socket(int *fd)
> +{
> +	if (*fd > 0) {
> +		close(*fd);
> +		*fd = -1;
> +	}
> +}
> +
> +static void
> +qtest_close_sockets(struct qtest_session *s)
> +{
> +	qtest_close_one_socket(&s->qtest_socket);
> +	qtest_close_one_socket(&s->msgfds.readfd);
> +	qtest_close_one_socket(&s->msgfds.writefd);
> +	qtest_close_one_socket(&s->irqfds.readfd);
> +	qtest_close_one_socket(&s->irqfds.writefd);
> +	qtest_close_one_socket(&s->ivshmem_socket);
> +}
> +
> +/*
> + * This thread relays QTest response using pipe.
> + * The function is needed because we need to separate IRQ message from others.
> + */
> +static void *
> +qtest_event_handler(void *data) {
> +	struct qtest_session *s = (struct qtest_session *)data;
> +	char buf[1024];
> +	char *p;
> +	int ret;
> +
> +	for (;;) {
> +		memset(buf, 0, sizeof(buf));
> +		ret = qtest_raw_recv(s->qtest_socket, buf, sizeof(buf));
> +		if (ret < 0) {
> +			qtest_close_sockets(s);
> +			return NULL;
> +		}
> +
> +		/* may receive multiple messages at the same time */

>From the qtest_raw_recv implementation, if at some point one message is
received by two qtest_raw_recv calls, then is that message discarded?
We could save the last incomplete message in buffer, and combine the
message received next time together.

> +		p = buf;
> +		do {
> +			qtest_handle_one_message(s, p);
> +		} while ((p = qtest_get_next_message(p)) != NULL);
> +	}
> +	return NULL;
> +}
> +


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (4 preceding siblings ...)
  2016-01-27 15:58     ` Xie, Huawei
@ 2016-01-27 16:45     ` Xie, Huawei
  2016-01-28  2:47       ` Tetsuya Mukawa
  2016-01-29  8:57     ` Yuanhan Liu
                       ` (6 subsequent siblings)
  12 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-27 16:45 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
> +qtest_find_pci_device(struct qtest_session *s, uint16_t bus, uint8_t device)
> +{
> +	struct qtest_pci_device *dev;
> +	uint32_t val;
> +
> +	val = qtest_pci_inl(s, bus, device, 0, 0);
> +	TAILQ_FOREACH(dev, &s->head, next) {
> +		if (val == ((uint32_t)dev->device_id << 16 | dev->vendor_id)) {
> +			dev->bus_addr = bus;
> +			dev->device_addr = device;
> +			return;
> +		}
> +
> +	}
> +}
> +
> +static int
> +qtest_init_pci_devices(struct qtest_session *s)
> +{
> +	struct qtest_pci_device *dev;
> +	uint16_t bus;
> +	uint8_t device;
> +	int ret;
> +
> +	/* Find devices */
> +	bus = 0;
> +	do {
> +		device = 0;
> +		do {
> +			qtest_find_pci_device(s, bus, device);
> +		} while (device++ != NB_DEVICE - 1);
> +	} while (bus++ != NB_BUS - 1);

Seems this scan of all the pci devices is very time consuming operation,
and each scan involves socket communication.
Do you measure how long it takes to do the pci devices initialization?

> +
> +	/* Initialize devices */
> +	TAILQ_FOREACH(dev, &s->head, next) {
> +		ret = dev->init(s, dev);
> +		if (ret != 0)
> +			return ret;
> +	}
> +
> +	return 0;


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-27  9:39             ` Xie, Huawei
@ 2016-01-28  2:33               ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  2:33 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/27 18:39, Xie, Huawei wrote:
> On 1/26/2016 10:58 AM, Tetsuya Mukawa wrote:
>> On 2016/01/25 19:15, Xie, Huawei wrote:
>>
>> BTW, my container implementation needed a QEMU patch in the case of
>> vhost-user.
>> But the patch has been merged in upstream QEMU, so we don't have this
>> limitation any more.
> Great, better put the QEMU dependency information in the commit message

Thanks for all your comments and carefully reviewing.

So far, I am not sure what is next QEMU version.
But I will add it after QEMU releases new one.

Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-27 10:03     ` Xie, Huawei
@ 2016-01-28  2:44       ` Tetsuya Mukawa
  2016-01-29  8:56         ` Xie, Huawei
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  2:44 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/27 19:03, Xie, Huawei wrote:
> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>> +	/* Set BAR region */
>> +	for (i = 0; i < NB_BAR; i++) {
>> +		switch (dev->bar[i].type) {
>> +		case QTEST_PCI_BAR_IO:
>> +		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
>> +		case QTEST_PCI_BAR_MEMORY_32:
>> +			qtest_pci_outl(s, bus, device, 0, dev->bar[i].addr,
>> +				dev->bar[i].region_start);
>> +			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
>> +				dev->name, dev->bar[i].region_start,
>> +				dev->bar[i].region_start + dev->bar[i].region_size);
>> +			break;
>> +		case QTEST_PCI_BAR_MEMORY_64:
>> +			qtest_pci_outq(s, bus, device, 0, dev->bar[i].addr,
>> +				dev->bar[i].region_start);
>> +			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
>> +				dev->name, dev->bar[i].region_start,
>> +				dev->bar[i].region_start + dev->bar[i].region_size);
>> +			break;
> Hasn't the bar resource already been allocated? Is it the app's
> responsibility to allocate the bar resource in qtest mode? The app
> couldn't have that knowledge.

Yes. In qtest mode, the app should register above values.
(Without it, default values are 0)
Usually, this will be done by BIOS or uEFI. But in qtest mode, these
will not be invoked.
So we need to define above values, and also need to enable PCI devices.

In this release, I just register hard coded values except for one of
ivshmem BAR.
In next release, I will describe memory map in comment.

Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-27 15:58     ` Xie, Huawei
@ 2016-01-28  2:47       ` Tetsuya Mukawa
  2016-01-28  9:48         ` Xie, Huawei
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  2:47 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/28 0:58, Xie, Huawei wrote:
> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
> [snip]
>> +
>> +static int
>> +qtest_raw_recv(int fd, char *buf, size_t count)
>> +{
>> +	size_t len = count;
>> +	size_t total_len = 0;
>> +	int ret = 0;
>> +
>> +	while (len > 0) {
>> +		ret = read(fd, buf, len);
>> +		if (ret == (int)len)
>> +			break;
>> +		if (*(buf + ret - 1) == '\n')
>> +			break;
> The above two lines should be put after the below if block.

Yes, it should be so.

>
>> +		if (ret == -1) {
>> +			if (errno == EINTR)
>> +				continue;
>> +			return ret;
>> +		}
>> +		total_len += ret;
>> +		buf += ret;
>> +		len -= ret;
>> +	}
>> +	return total_len + ret;
>> +}
>> +
> [snip]
>
>> +
>> +static void
>> +qtest_handle_one_message(struct qtest_session *s, char *buf)
>> +{
>> +	int ret;
>> +
>> +	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
>> +		if (rte_atomic16_read(&s->enable_intr) == 0)
>> +			return;
>> +
>> +		/* relay interrupt to pipe */
>> +		ret = write(s->irqfds.writefd, "1", 1);
>> +		if (ret < 0)
>> +			rte_panic("cannot relay interrupt\n");
>> +	} else {
>> +		/* relay normal message to pipe */
>> +		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
>> +		if (ret < 0)
>> +			rte_panic("cannot relay normal message\n");
>> +	}
>> +}
>> +
>> +static char *
>> +qtest_get_next_message(char *p)
>> +{
>> +	p = strchr(p, '\n');
>> +	if ((p == NULL) || (*(p + 1) == '\0'))
>> +		return NULL;
>> +	return p + 1;
>> +}
>> +
>> +static void
>> +qtest_close_one_socket(int *fd)
>> +{
>> +	if (*fd > 0) {
>> +		close(*fd);
>> +		*fd = -1;
>> +	}
>> +}
>> +
>> +static void
>> +qtest_close_sockets(struct qtest_session *s)
>> +{
>> +	qtest_close_one_socket(&s->qtest_socket);
>> +	qtest_close_one_socket(&s->msgfds.readfd);
>> +	qtest_close_one_socket(&s->msgfds.writefd);
>> +	qtest_close_one_socket(&s->irqfds.readfd);
>> +	qtest_close_one_socket(&s->irqfds.writefd);
>> +	qtest_close_one_socket(&s->ivshmem_socket);
>> +}
>> +
>> +/*
>> + * This thread relays QTest response using pipe.
>> + * The function is needed because we need to separate IRQ message from others.
>> + */
>> +static void *
>> +qtest_event_handler(void *data) {
>> +	struct qtest_session *s = (struct qtest_session *)data;
>> +	char buf[1024];
>> +	char *p;
>> +	int ret;
>> +
>> +	for (;;) {
>> +		memset(buf, 0, sizeof(buf));
>> +		ret = qtest_raw_recv(s->qtest_socket, buf, sizeof(buf));
>> +		if (ret < 0) {
>> +			qtest_close_sockets(s);
>> +			return NULL;
>> +		}
>> +
>> +		/* may receive multiple messages at the same time */
> From the qtest_raw_recv implementation, if at some point one message is
> received by two qtest_raw_recv calls, then is that message discarded?
> We could save the last incomplete message in buffer, and combine the
> message received next time together.

I guess we don't lose replies from QEMU.
Please let me describe more.

According to the qtest specification, after sending a message, we need
to receive a reply like below.
APP: ---command---> QEMU
APP: <-----------OK---- QEMU

But, to handle interrupt message, we need to take care below case.
APP: ---command---> QEMU
APP: <---interrupt---- QEMU
APP: <-----------OK---- QEMU

Also, we need to handle a case like multiple threads tries to send a
qtest message.
Anyway, here is current implementation.

So far, we have 3 types of sockets.
1. socket for qtest messaging.
2. socket for relaying normal message.
3. socket for relaying interrupt message.

About read direction:
The qtest socket is only read by "qtest_event_handler". The handler may
receive multiple messages at once.
In the case,  the handler split messages, and send it to normal message
socket or interrupt message socket.

About write direction:
The qtest socket will be written by below functions.
 - qtest_raw_in/out
 - qtest_raw_read/write
But all functions that use above functions need to have mutex before
sending messages.
So all messaging will not be overlapped, then only one thread will read
the socket for relaying normal message.

Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-27 16:45     ` Xie, Huawei
@ 2016-01-28  2:47       ` Tetsuya Mukawa
  2016-01-28  6:15         ` Xie, Huawei
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  2:47 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/28 1:45, Xie, Huawei wrote:
> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>> +qtest_find_pci_device(struct qtest_session *s, uint16_t bus, uint8_t device)
>> +{
>> +	struct qtest_pci_device *dev;
>> +	uint32_t val;
>> +
>> +	val = qtest_pci_inl(s, bus, device, 0, 0);
>> +	TAILQ_FOREACH(dev, &s->head, next) {
>> +		if (val == ((uint32_t)dev->device_id << 16 | dev->vendor_id)) {
>> +			dev->bus_addr = bus;
>> +			dev->device_addr = device;
>> +			return;
>> +		}
>> +
>> +	}
>> +}
>> +
>> +static int
>> +qtest_init_pci_devices(struct qtest_session *s)
>> +{
>> +	struct qtest_pci_device *dev;
>> +	uint16_t bus;
>> +	uint8_t device;
>> +	int ret;
>> +
>> +	/* Find devices */
>> +	bus = 0;
>> +	do {
>> +		device = 0;
>> +		do {
>> +			qtest_find_pci_device(s, bus, device);
>> +		} while (device++ != NB_DEVICE - 1);
>> +	} while (bus++ != NB_BUS - 1);
> Seems this scan of all the pci devices is very time consuming operation,
> and each scan involves socket communication.
> Do you measure how long it takes to do the pci devices initialization?

I measured it, and seems it takes 0.35 seconds in my environment.
This will be done only once when the port is initialized. Probably it's
not so heady.

Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-28  2:47       ` Tetsuya Mukawa
@ 2016-01-28  6:15         ` Xie, Huawei
  2016-01-28  6:29           ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-28  6:15 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/28/2016 10:48 AM, Tetsuya Mukawa wrote:
> I measured it, and seems it takes 0.35 seconds in my environment.
> This will be done only once when the port is initialized. Probably it's
> not so heady.

There are 256 x 32 loop of pci scan. That is too long if we dynamically
start/tear down the container, otherwise it is ok. Some people are
struggling reducing the VM booting time from seconds to milliseconds to
compete with container technology. Let us consider if we could optimize
this.
For example, QEMU supports specifying bus/dev for a device in its
commandline, so could we assign fixed bus for virtio-net and ivshm
device? And for piix3, is it on bus 0/1?


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-28  6:15         ` Xie, Huawei
@ 2016-01-28  6:29           ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  6:29 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/28 15:15, Xie, Huawei wrote:
> On 1/28/2016 10:48 AM, Tetsuya Mukawa wrote:
>> I measured it, and seems it takes 0.35 seconds in my environment.
>> This will be done only once when the port is initialized. Probably it's
>> not so heady.
> There are 256 x 32 loop of pci scan. That is too long if we dynamically
> start/tear down the container, otherwise it is ok. Some people are
> struggling reducing the VM booting time from seconds to milliseconds to
> compete with container technology. Let us consider if we could optimize
> this.
> For example, QEMU supports specifying bus/dev for a device in its
> commandline, so could we assign fixed bus for virtio-net and ivshm
> device? And for piix3, is it on bus 0/1?
>

OK, I understand the necessity. Let's consider it.
So far, the users doesn't need to specify pci address on QEMU command
line and DPDK vdev option.
But, let's change this, then we can remove this looping.

Probably specifying pci address on vdev option will not be mandatory.
if not specified, just using default value is nice.
I will fix like above in next release.

Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH v2 0/3] virtio: Add a new layer to abstract pci access method
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                     ` (5 preceding siblings ...)
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
@ 2016-01-28  9:33   ` Tetsuya Mukawa
  2016-01-28  9:33   ` [PATCH v2 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                     ` (2 subsequent siblings)
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  9:33 UTC (permalink / raw)
  To: dev

The patches abstract pci access method of virtio-net PMD.
The patch should be on Yuanhan's below patch series.
 - [PATCH v6 0/9] virtio 1.0 enabling for virtio pmd driver.

PATCH v2 changes
 - Rebase on Yuanhan's v6 patches.
 - split virtio_pci_access_ops in 2 different structures.
 - some refactoring.


Tetsuya Mukawa (3):
  virtio: Change the parameter order of io_write8/16/32()
  virtio: move rte_eal_pci_unmap_device() to virtio_pci.c
  virtio: Add a new layer to abstract pci access method

 drivers/net/virtio/virtio_ethdev.c |   4 +-
 drivers/net/virtio/virtio_pci.c    | 554 +++++++++++++++++++++++++------------
 drivers/net/virtio/virtio_pci.h    |  23 +-
 3 files changed, 403 insertions(+), 178 deletions(-)

-- 
2.1.4

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH v2 1/3] virtio: Change the parameter order of io_write8/16/32()
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                     ` (6 preceding siblings ...)
  2016-01-28  9:33   ` [PATCH v2 0/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
@ 2016-01-28  9:33   ` Tetsuya Mukawa
  2016-01-28  9:33   ` [PATCH v2 2/3] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c Tetsuya Mukawa
  2016-01-28  9:33   ` [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  9:33 UTC (permalink / raw)
  To: dev

The patch change the parameter order of below functions.
 - io_write8()
 - io_write16()
 - io_write32()
This changig are needed to add a new layer to abstract accessing
method.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_pci.c | 70 ++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index e16104e..1fca39f 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -477,7 +477,7 @@ io_read8(uint8_t *addr)
 }
 
 static inline void
-io_write8(uint8_t val, uint8_t *addr)
+io_write8(uint8_t *addr, uint8_t val)
 {
 	*(volatile uint8_t *)addr = val;
 }
@@ -489,7 +489,7 @@ io_read16(uint16_t *addr)
 }
 
 static inline void
-io_write16(uint16_t val, uint16_t *addr)
+io_write16(uint16_t *addr, uint16_t val)
 {
 	*(volatile uint16_t *)addr = val;
 }
@@ -501,16 +501,16 @@ io_read32(uint32_t *addr)
 }
 
 static inline void
-io_write32(uint32_t val, uint32_t *addr)
+io_write32(uint32_t *addr, uint32_t val)
 {
 	*(volatile uint32_t *)addr = val;
 }
 
 static inline void
-io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
+io_write64_twopart(uint32_t *lo, uint32_t *hi, uint64_t val)
 {
-	io_write32(val & ((1ULL << 32) - 1), lo);
-	io_write32(val >> 32,		     hi);
+	io_write32(lo, val & ((1ULL << 32) - 1));
+	io_write32(hi, val >> 32);
 }
 
 static void
@@ -540,7 +540,7 @@ modern_write_dev_config(struct virtio_hw *hw, size_t offset,
 	const uint8_t *p = src;
 
 	for (i = 0;  i < length; i++)
-		io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i);
+		io_write8((uint8_t *)hw->dev_cfg + offset + i, *p++);
 }
 
 static uint64_t
@@ -548,10 +548,10 @@ modern_get_features(struct virtio_hw *hw)
 {
 	uint32_t features_lo, features_hi;
 
-	io_write32(0, &hw->common_cfg->device_feature_select);
+	io_write32(&hw->common_cfg->device_feature_select, 0);
 	features_lo = io_read32(&hw->common_cfg->device_feature);
 
-	io_write32(1, &hw->common_cfg->device_feature_select);
+	io_write32(&hw->common_cfg->device_feature_select, 1);
 	features_hi = io_read32(&hw->common_cfg->device_feature);
 
 	return ((uint64_t)features_hi << 32) | features_lo;
@@ -560,13 +560,13 @@ modern_get_features(struct virtio_hw *hw)
 static void
 modern_set_features(struct virtio_hw *hw, uint64_t features)
 {
-	io_write32(0, &hw->common_cfg->guest_feature_select);
-	io_write32(features & ((1ULL << 32) - 1),
-		&hw->common_cfg->guest_feature);
+	io_write32(&hw->common_cfg->guest_feature_select, 0);
+	io_write32(&hw->common_cfg->guest_feature,
+		   features & ((1ULL << 32) - 1));
 
-	io_write32(1, &hw->common_cfg->guest_feature_select);
-	io_write32(features >> 32,
-		&hw->common_cfg->guest_feature);
+	io_write32(&hw->common_cfg->guest_feature_select, 1);
+	io_write32(&hw->common_cfg->guest_feature,
+		   features >> 32);
 }
 
 static uint8_t
@@ -578,7 +578,7 @@ modern_get_status(struct virtio_hw *hw)
 static void
 modern_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	io_write8(status, &hw->common_cfg->device_status);
+	io_write8(&hw->common_cfg->device_status, status);
 }
 
 static void
@@ -597,14 +597,14 @@ modern_get_isr(struct virtio_hw *hw)
 static uint16_t
 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-	io_write16(vec, &hw->common_cfg->msix_config);
+	io_write16(&hw->common_cfg->msix_config, vec);
 	return io_read16(&hw->common_cfg->msix_config);
 }
 
 static uint16_t
 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-	io_write16(queue_id, &hw->common_cfg->queue_select);
+	io_write16(&hw->common_cfg->queue_select, queue_id);
 	return io_read16(&hw->common_cfg->queue_size);
 }
 
@@ -620,20 +620,20 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 							 ring[vq->vq_nentries]),
 				   VIRTIO_PCI_VRING_ALIGN);
 
-	io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
-				      &hw->common_cfg->queue_desc_hi);
-	io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
-				       &hw->common_cfg->queue_avail_hi);
-	io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
-				      &hw->common_cfg->queue_used_hi);
+	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+			   &hw->common_cfg->queue_desc_hi, desc_addr);
+	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+			   &hw->common_cfg->queue_avail_hi, avail_addr);
+	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+			   &hw->common_cfg->queue_used_hi, used_addr);
 
 	notify_off = io_read16(&hw->common_cfg->queue_notify_off);
 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
 				notify_off * hw->notify_off_multiplier);
 
-	io_write16(1, &hw->common_cfg->queue_enable);
+	io_write16(&hw->common_cfg->queue_enable, 1);
 
 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %"PRIx64, desc_addr);
@@ -646,22 +646,22 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 static void
 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
-				  &hw->common_cfg->queue_desc_hi);
-	io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
-				  &hw->common_cfg->queue_avail_hi);
-	io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
-				  &hw->common_cfg->queue_used_hi);
+	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+			   &hw->common_cfg->queue_desc_hi, 0);
+	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+			   &hw->common_cfg->queue_avail_hi, 0);
+	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+			   &hw->common_cfg->queue_used_hi, 0);
 
-	io_write16(0, &hw->common_cfg->queue_enable);
+	io_write16(&hw->common_cfg->queue_enable, 0);
 }
 
 static void
 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 {
-	io_write16(1, vq->notify_addr);
+	io_write16(vq->notify_addr, 1);
 }
 
 static const struct virtio_pci_ops modern_ops = {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v2 2/3] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                     ` (7 preceding siblings ...)
  2016-01-28  9:33   ` [PATCH v2 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
@ 2016-01-28  9:33   ` Tetsuya Mukawa
  2016-01-28  9:33   ` [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  9:33 UTC (permalink / raw)
  To: dev

To abstract pci access method, the patch moves below function
to "virtio_pci.c".
 - rte_eal_pci_unmap_device()

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c |  2 +-
 drivers/net/virtio/virtio_pci.c    | 11 +++++++++++
 drivers/net/virtio/virtio_pci.h    |  1 +
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index deb0382..37833a8 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1177,7 +1177,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
-	rte_eal_pci_unmap_device(pci_dev);
+	vtpci_uninit(pci_dev, hw);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 1fca39f..3e6be8c 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -892,3 +892,14 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
 
 	return 0;
 }
+
+void
+vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *hw)
+{
+	hw->dev  = NULL;
+	hw->vtpci_ops = NULL;
+	hw->use_msix = 0;
+	hw->io_base  = 0;
+	hw->modern   = 0;
+	rte_eal_pci_unmap_device(dev);
+}
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 0544a07..17c7972 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -328,6 +328,7 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
  * Function declaration from virtio_pci.c
  */
 int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
+void vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method
  2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
                     ` (8 preceding siblings ...)
  2016-01-28  9:33   ` [PATCH v2 2/3] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c Tetsuya Mukawa
@ 2016-01-28  9:33   ` Tetsuya Mukawa
  2016-01-29  9:17     ` Yuanhan Liu
  9 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  9:33 UTC (permalink / raw)
  To: dev

This patch addss function pointers to abstract pci access method.
This abstraction layer will be used when virtio-net PMD supports
container extension.

The below functions abstract how to access to pci configuration space.

struct virtio_pci_cfg_ops {
        int   (*map)(...);
        void  (*unmap)(...);
        void *(*get_mapped_addr)(...);
        int   (*read)(...);
};

The pci configuration space has information how to access to virtio
device registers. Basically, there are 2 ways to acccess to the
registers. One is using portio and the other is using mapped memory.
The below functions abstract this access method.

struct virtio_pci_dev_ops {
        uint8_t  (*read8)(...);
        uint16_t (*read16)(...);
        uint32_t (*read32)(...);
        void     (*write8)(...);
        void     (*write16)(...);
        void     (*write32)(...);
};

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c |   4 +-
 drivers/net/virtio/virtio_pci.c    | 531 +++++++++++++++++++++++++------------
 drivers/net/virtio/virtio_pci.h    |  24 +-
 3 files changed, 386 insertions(+), 173 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 37833a8..c477b05 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1037,7 +1037,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	pci_dev = eth_dev->pci_dev;
 
-	if (vtpci_init(pci_dev, hw) < 0)
+	if (vtpci_init(eth_dev, hw) < 0)
 		return -1;
 
 	/* Reset the device although not necessary at startup */
@@ -1177,7 +1177,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
-	vtpci_uninit(pci_dev, hw);
+	vtpci_uninit(eth_dev, hw);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 3e6be8c..c6d72f9 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -49,24 +49,198 @@
 #define PCI_CAPABILITY_LIST	0x34
 #define PCI_CAP_ID_VNDR		0x09
 
+static uint8_t
+phys_legacy_read8(struct virtio_hw *hw, uint8_t *addr)
+{
+	return inb((unsigned short)(hw->io_base + (uint64_t)addr));
+}
 
-#define VIRTIO_PCI_REG_ADDR(hw, reg) \
-	(unsigned short)((hw)->io_base + (reg))
+static uint16_t
+phys_legacy_read16(struct virtio_hw *hw, uint16_t *addr)
+{
+	return inw((unsigned short)(hw->io_base + (uint64_t)addr));
+}
 
-#define VIRTIO_READ_REG_1(hw, reg) \
-	inb((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_1(hw, reg, value) \
-	outb_p((unsigned char)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+static uint32_t
+phys_legacy_read32(struct virtio_hw *hw, uint32_t *addr)
+{
+	return inl((unsigned short)(hw->io_base + (uint64_t)addr));
+}
 
-#define VIRTIO_READ_REG_2(hw, reg) \
-	inw((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_2(hw, reg, value) \
-	outw_p((unsigned short)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+static void
+phys_legacy_write8(struct virtio_hw *hw, uint8_t *addr, uint8_t val)
+{
+	return outb_p((unsigned char)val,
+			(unsigned short)(hw->io_base + (uint64_t)addr));
+}
 
-#define VIRTIO_READ_REG_4(hw, reg) \
-	inl((VIRTIO_PCI_REG_ADDR((hw), (reg))))
-#define VIRTIO_WRITE_REG_4(hw, reg, value) \
-	outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
+static void
+phys_legacy_write16(struct virtio_hw *hw, uint16_t *addr, uint16_t val)
+{
+	return outw_p((unsigned short)val,
+			(unsigned short)(hw->io_base + (uint64_t)addr));
+}
+
+static void
+phys_legacy_write32(struct virtio_hw *hw, uint32_t *addr, uint32_t val)
+{
+	return outl_p((unsigned int)val,
+			(unsigned short)(hw->io_base + (uint64_t)addr));
+}
+
+static const struct virtio_pci_dev_ops phys_legacy_dev_ops = {
+	.read8		= phys_legacy_read8,
+	.read16		= phys_legacy_read16,
+	.read32		= phys_legacy_read32,
+	.write8		= phys_legacy_write8,
+	.write16	= phys_legacy_write16,
+	.write32	= phys_legacy_write32,
+};
+
+static uint8_t
+phys_modern_read8(struct virtio_hw *hw __rte_unused, uint8_t *addr)
+{
+	return *(volatile uint8_t *)addr;
+}
+
+static uint16_t
+phys_modern_read16(struct virtio_hw *hw __rte_unused, uint16_t *addr)
+{
+	return *(volatile uint16_t *)addr;
+}
+
+static uint32_t
+phys_modern_read32(struct virtio_hw *hw __rte_unused, uint32_t *addr)
+{
+	return *(volatile uint32_t *)addr;
+}
+
+static void
+phys_modern_write8(struct virtio_hw *hw __rte_unused,
+		uint8_t *addr, uint8_t val)
+{
+	*(volatile uint8_t *)addr = val;
+}
+
+static void
+phys_modern_write16(struct virtio_hw *hw __rte_unused,
+		uint16_t *addr, uint16_t val)
+{
+	*(volatile uint16_t *)addr = val;
+}
+
+static void
+phys_modern_write32(struct virtio_hw *hw __rte_unused,
+		uint32_t *addr, uint32_t val)
+{
+	*(volatile uint32_t *)addr = val;
+}
+
+static const struct virtio_pci_dev_ops phys_modern_dev_ops = {
+	.read8		= phys_modern_read8,
+	.read16		= phys_modern_read16,
+	.read32		= phys_modern_read32,
+	.write8		= phys_modern_write8,
+	.write16	= phys_modern_write16,
+	.write32	= phys_modern_write32,
+};
+
+static int
+vtpci_dev_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
+{
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		if (hw->modern == 1)
+			hw->vtpci_dev_ops = &phys_modern_dev_ops;
+		else
+			hw->vtpci_dev_ops = &phys_legacy_dev_ops;
+		return 0;
+	}
+
+	PMD_DRV_LOG(ERR, "Unkown virtio-net device.");
+	return -1;
+}
+
+static void
+vtpci_dev_uninit(struct rte_eth_dev *dev __rte_unused, struct virtio_hw *hw)
+{
+	hw->vtpci_dev_ops = NULL;
+}
+
+static int
+phys_map_pci_cfg(struct virtio_hw *hw)
+{
+	return rte_eal_pci_map_device(hw->dev);
+}
+
+static void
+phys_unmap_pci_cfg(struct virtio_hw *hw)
+{
+	rte_eal_pci_unmap_device(hw->dev);
+}
+
+static int
+phys_read_pci_cfg(struct virtio_hw *hw, void *buf, size_t len, off_t offset)
+{
+	return rte_eal_pci_read_config(hw->dev, buf, len, offset);
+}
+
+static void *
+phys_get_mapped_addr(struct virtio_hw *hw, uint8_t bar,
+		     uint32_t offset, uint32_t length)
+{
+	uint8_t *base;
+
+	if (bar > 5) {
+		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
+		return NULL;
+	}
+
+	if (offset + length < offset) {
+		PMD_INIT_LOG(ERR, "offset(%u) + lenght(%u) overflows",
+			offset, length);
+		return NULL;
+	}
+
+	if (offset + length > hw->dev->mem_resource[bar].len) {
+		PMD_INIT_LOG(ERR,
+			"invalid cap: overflows bar space: %u > %"PRIu64,
+			offset + length, hw->dev->mem_resource[bar].len);
+		return NULL;
+	}
+
+	base = hw->dev->mem_resource[bar].addr;
+	if (base == NULL) {
+		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
+		return NULL;
+	}
+
+	return base + offset;
+}
+
+static const struct virtio_pci_cfg_ops phys_cfg_ops = {
+	.map			= phys_map_pci_cfg,
+	.unmap			= phys_unmap_pci_cfg,
+	.get_mapped_addr	= phys_get_mapped_addr,
+	.read			= phys_read_pci_cfg,
+};
+
+static int
+vtpci_cfg_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
+{
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		hw->vtpci_cfg_ops = &phys_cfg_ops;
+		return 0;
+	}
+
+	PMD_DRV_LOG(ERR, "Unkown virtio-net device.");
+	return -1;
+}
+
+static void
+vtpci_cfg_uninit(struct rte_eth_dev *dev __rte_unused, struct virtio_hw *hw)
+{
+	hw->vtpci_cfg_ops = NULL;
+}
 
 static void
 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
@@ -80,13 +254,16 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
 	for (d = dst; length > 0; d += size, off += size, length -= size) {
 		if (length >= 4) {
 			size = 4;
-			*(uint32_t *)d = VIRTIO_READ_REG_4(hw, off);
+			*(uint32_t *)d = hw->vtpci_dev_ops->read32(
+						hw, (uint32_t *)off);
 		} else if (length >= 2) {
 			size = 2;
-			*(uint16_t *)d = VIRTIO_READ_REG_2(hw, off);
+			*(uint16_t *)d = hw->vtpci_dev_ops->read16(
+						hw, (uint16_t *)off);
 		} else {
 			size = 1;
-			*d = VIRTIO_READ_REG_1(hw, off);
+			*d = hw->vtpci_dev_ops->read8(
+						hw, (uint8_t *)off);
 		}
 	}
 }
@@ -103,13 +280,15 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
 	for (s = src; length > 0; s += size, off += size, length -= size) {
 		if (length >= 4) {
 			size = 4;
-			VIRTIO_WRITE_REG_4(hw, off, *(const uint32_t *)s);
+			hw->vtpci_dev_ops->write32(hw,
+					(uint32_t *)off, *(const uint32_t *)s);
 		} else if (length >= 2) {
 			size = 2;
-			VIRTIO_WRITE_REG_2(hw, off, *(const uint16_t *)s);
+			hw->vtpci_dev_ops->write16(hw,
+					(uint16_t *)off, *(const uint16_t *)s);
 		} else {
 			size = 1;
-			VIRTIO_WRITE_REG_1(hw, off, *s);
+			hw->vtpci_dev_ops->write8(hw, (uint8_t *)off, *s);
 		}
 	}
 }
@@ -117,7 +296,8 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
 static uint64_t
 legacy_get_features(struct virtio_hw *hw)
 {
-	return VIRTIO_READ_REG_4(hw, VIRTIO_PCI_HOST_FEATURES);
+	return hw->vtpci_dev_ops->read32(hw,
+			(uint32_t *)VIRTIO_PCI_HOST_FEATURES);
 }
 
 static void
@@ -128,19 +308,20 @@ legacy_set_features(struct virtio_hw *hw, uint64_t features)
 			"only 32 bit features are allowed for legacy virtio!");
 		return;
 	}
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_GUEST_FEATURES, features);
+	hw->vtpci_dev_ops->write32(hw,
+			(uint32_t *)VIRTIO_PCI_GUEST_FEATURES, features);
 }
 
 static uint8_t
 legacy_get_status(struct virtio_hw *hw)
 {
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_STATUS);
+	return hw->vtpci_dev_ops->read8(hw, (uint8_t *)VIRTIO_PCI_STATUS);
 }
 
 static void
 legacy_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	VIRTIO_WRITE_REG_1(hw, VIRTIO_PCI_STATUS, status);
+	hw->vtpci_dev_ops->write8(hw, (uint8_t *)VIRTIO_PCI_STATUS, status);
 }
 
 static void
@@ -152,45 +333,55 @@ legacy_reset(struct virtio_hw *hw)
 static uint8_t
 legacy_get_isr(struct virtio_hw *hw)
 {
-	return VIRTIO_READ_REG_1(hw, VIRTIO_PCI_ISR);
+	return hw->vtpci_dev_ops->read8(hw, (uint8_t *)VIRTIO_PCI_ISR);
 }
 
 /* Enable one vector (0) for Link State Intrerrupt */
 static uint16_t
 legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR, vec);
-	return VIRTIO_READ_REG_2(hw, VIRTIO_MSI_CONFIG_VECTOR);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_MSI_CONFIG_VECTOR, vec);
+	return hw->vtpci_dev_ops->read16(hw,
+			(uint16_t *)VIRTIO_MSI_CONFIG_VECTOR);
 }
 
 static uint16_t
 legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, queue_id);
-	return VIRTIO_READ_REG_2(hw, VIRTIO_PCI_QUEUE_NUM);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_SEL, queue_id);
+	return hw->vtpci_dev_ops->read16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_NUM);
 }
 
 static void
 legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
 
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN,
-		vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
+	hw->vtpci_dev_ops->write32(hw,
+			(uint32_t *)VIRTIO_PCI_QUEUE_PFN,
+			vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
 }
 
 static void
 legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_SEL, vq->vq_queue_index);
 
-	VIRTIO_WRITE_REG_4(hw, VIRTIO_PCI_QUEUE_PFN, 0);
+	hw->vtpci_dev_ops->write32(hw,
+			(uint32_t *)VIRTIO_PCI_QUEUE_PFN, 0);
 }
 
 static void
 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	VIRTIO_WRITE_REG_2(hw, VIRTIO_PCI_QUEUE_NOTIFY, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			(uint16_t *)VIRTIO_PCI_QUEUE_NOTIFY,
+			vq->vq_queue_index);
 }
 
 #ifdef RTE_EXEC_ENV_LINUXAPP
@@ -470,47 +661,12 @@ static const struct virtio_pci_ops legacy_ops = {
 
 
 
-static inline uint8_t
-io_read8(uint8_t *addr)
-{
-	return *(volatile uint8_t *)addr;
-}
-
-static inline void
-io_write8(uint8_t *addr, uint8_t val)
-{
-	*(volatile uint8_t *)addr = val;
-}
-
-static inline uint16_t
-io_read16(uint16_t *addr)
-{
-	return *(volatile uint16_t *)addr;
-}
-
-static inline void
-io_write16(uint16_t *addr, uint16_t val)
-{
-	*(volatile uint16_t *)addr = val;
-}
-
-static inline uint32_t
-io_read32(uint32_t *addr)
-{
-	return *(volatile uint32_t *)addr;
-}
-
-static inline void
-io_write32(uint32_t *addr, uint32_t val)
-{
-	*(volatile uint32_t *)addr = val;
-}
-
 static inline void
-io_write64_twopart(uint32_t *lo, uint32_t *hi, uint64_t val)
+io_write64_twopart(struct virtio_hw *hw,
+		uint32_t *lo, uint32_t *hi, uint64_t val)
 {
-	io_write32(lo, val & ((1ULL << 32) - 1));
-	io_write32(hi, val >> 32);
+	hw->vtpci_dev_ops->write32(hw, lo, val & ((1ULL << 32) - 1));
+	hw->vtpci_dev_ops->write32(hw, hi, val >> 32);
 }
 
 static void
@@ -522,13 +678,16 @@ modern_read_dev_config(struct virtio_hw *hw, size_t offset,
 	uint8_t old_gen, new_gen;
 
 	do {
-		old_gen = io_read8(&hw->common_cfg->config_generation);
+		old_gen = hw->vtpci_dev_ops->read8(hw,
+				&hw->common_cfg->config_generation);
 
 		p = dst;
 		for (i = 0;  i < length; i++)
-			*p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i);
+			*p++ = hw->vtpci_dev_ops->read8(hw,
+					(uint8_t *)hw->dev_cfg + offset + i);
 
-		new_gen = io_read8(&hw->common_cfg->config_generation);
+		new_gen = hw->vtpci_dev_ops->read8(hw,
+				&hw->common_cfg->config_generation);
 	} while (old_gen != new_gen);
 }
 
@@ -540,7 +699,8 @@ modern_write_dev_config(struct virtio_hw *hw, size_t offset,
 	const uint8_t *p = src;
 
 	for (i = 0;  i < length; i++)
-		io_write8((uint8_t *)hw->dev_cfg + offset + i, *p++);
+		hw->vtpci_dev_ops->write8(hw,
+				(uint8_t *)hw->dev_cfg + offset + i, *p++);
 }
 
 static uint64_t
@@ -548,11 +708,15 @@ modern_get_features(struct virtio_hw *hw)
 {
 	uint32_t features_lo, features_hi;
 
-	io_write32(&hw->common_cfg->device_feature_select, 0);
-	features_lo = io_read32(&hw->common_cfg->device_feature);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->device_feature_select, 0);
+	features_lo = hw->vtpci_dev_ops->read32(hw,
+			&hw->common_cfg->device_feature);
 
-	io_write32(&hw->common_cfg->device_feature_select, 1);
-	features_hi = io_read32(&hw->common_cfg->device_feature);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->device_feature_select, 1);
+	features_hi = hw->vtpci_dev_ops->read32(hw,
+			&hw->common_cfg->device_feature);
 
 	return ((uint64_t)features_hi << 32) | features_lo;
 }
@@ -560,25 +724,30 @@ modern_get_features(struct virtio_hw *hw)
 static void
 modern_set_features(struct virtio_hw *hw, uint64_t features)
 {
-	io_write32(&hw->common_cfg->guest_feature_select, 0);
-	io_write32(&hw->common_cfg->guest_feature,
-		   features & ((1ULL << 32) - 1));
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->guest_feature_select, 0);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->guest_feature,
+			features & ((1ULL << 32) - 1));
 
-	io_write32(&hw->common_cfg->guest_feature_select, 1);
-	io_write32(&hw->common_cfg->guest_feature,
-		   features >> 32);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->guest_feature_select, 1);
+	hw->vtpci_dev_ops->write32(hw,
+			&hw->common_cfg->guest_feature, features >> 32);
 }
 
 static uint8_t
 modern_get_status(struct virtio_hw *hw)
 {
-	return io_read8(&hw->common_cfg->device_status);
+	return hw->vtpci_dev_ops->read8(hw,
+			&hw->common_cfg->device_status);
 }
 
 static void
 modern_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	io_write8(&hw->common_cfg->device_status, status);
+	hw->vtpci_dev_ops->write8(hw,
+			&hw->common_cfg->device_status, status);
 }
 
 static void
@@ -591,21 +760,25 @@ modern_reset(struct virtio_hw *hw)
 static uint8_t
 modern_get_isr(struct virtio_hw *hw)
 {
-	return io_read8(hw->isr);
+	return hw->vtpci_dev_ops->read8(hw, hw->isr);
 }
 
 static uint16_t
 modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
-	io_write16(&hw->common_cfg->msix_config, vec);
-	return io_read16(&hw->common_cfg->msix_config);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->msix_config, vec);
+	return hw->vtpci_dev_ops->read16(hw,
+			&hw->common_cfg->msix_config);
 }
 
 static uint16_t
 modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
-	io_write16(&hw->common_cfg->queue_select, queue_id);
-	return io_read16(&hw->common_cfg->queue_size);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_select, queue_id);
+	return hw->vtpci_dev_ops->read16(hw,
+			&hw->common_cfg->queue_size);
 }
 
 static void
@@ -620,20 +793,23 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 							 ring[vq->vq_nentries]),
 				   VIRTIO_PCI_VRING_ALIGN);
 
-	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_desc_lo,
 			   &hw->common_cfg->queue_desc_hi, desc_addr);
-	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_avail_lo,
 			   &hw->common_cfg->queue_avail_hi, avail_addr);
-	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_used_lo,
 			   &hw->common_cfg->queue_used_hi, used_addr);
 
-	notify_off = io_read16(&hw->common_cfg->queue_notify_off);
+	notify_off = hw->vtpci_dev_ops->read16(hw,
+				&hw->common_cfg->queue_notify_off);
 	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
 				notify_off * hw->notify_off_multiplier);
 
-	io_write16(&hw->common_cfg->queue_enable, 1);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_enable, 1);
 
 	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
 	PMD_INIT_LOG(DEBUG, "\t desc_addr: %"PRIx64, desc_addr);
@@ -646,22 +822,24 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 static void
 modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	io_write16(&hw->common_cfg->queue_select, vq->vq_queue_index);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_select, vq->vq_queue_index);
 
-	io_write64_twopart(&hw->common_cfg->queue_desc_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_desc_lo,
 			   &hw->common_cfg->queue_desc_hi, 0);
-	io_write64_twopart(&hw->common_cfg->queue_avail_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_avail_lo,
 			   &hw->common_cfg->queue_avail_hi, 0);
-	io_write64_twopart(&hw->common_cfg->queue_used_lo,
+	io_write64_twopart(hw, &hw->common_cfg->queue_used_lo,
 			   &hw->common_cfg->queue_used_hi, 0);
 
-	io_write16(&hw->common_cfg->queue_enable, 0);
+	hw->vtpci_dev_ops->write16(hw,
+			&hw->common_cfg->queue_enable, 0);
 }
 
 static void
 modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 {
-	io_write16(vq->notify_addr, 1);
+	hw->vtpci_dev_ops->write16(hw, vq->notify_addr, 1);
 }
 
 static const struct virtio_pci_ops modern_ops = {
@@ -680,7 +858,6 @@ static const struct virtio_pci_ops modern_ops = {
 	.notify_queue	= modern_notify_queue,
 };
 
-
 void
 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
 		      void *dst, int length)
@@ -753,61 +930,26 @@ vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
 	return hw->vtpci_ops->set_config_irq(hw, vec);
 }
 
-static void *
-get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
-{
-	uint8_t  bar    = cap->bar;
-	uint32_t length = cap->length;
-	uint32_t offset = cap->offset;
-	uint8_t *base;
-
-	if (bar > 5) {
-		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
-		return NULL;
-	}
-
-	if (offset + length < offset) {
-		PMD_INIT_LOG(ERR, "offset(%u) + lenght(%u) overflows",
-			offset, length);
-		return NULL;
-	}
-
-	if (offset + length > dev->mem_resource[bar].len) {
-		PMD_INIT_LOG(ERR,
-			"invalid cap: overflows bar space: %u > %"PRIu64,
-			offset + length, dev->mem_resource[bar].len);
-		return NULL;
-	}
-
-	base = dev->mem_resource[bar].addr;
-	if (base == NULL) {
-		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
-		return NULL;
-	}
-
-	return base + offset;
-}
-
 static int
-virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
+virtio_read_caps(struct virtio_hw *hw)
 {
 	uint8_t pos;
 	struct virtio_pci_cap cap;
 	int ret;
 
-	if (rte_eal_pci_map_device(dev) < 0) {
+	if (hw->vtpci_cfg_ops->map(hw) < 0) {
 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
 		return -1;
 	}
 
-	ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+	ret = hw->vtpci_cfg_ops->read(hw, &pos, 1, PCI_CAPABILITY_LIST);
 	if (ret < 0) {
 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
 		return -1;
 	}
 
 	while (pos) {
-		ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos);
+		ret = hw->vtpci_cfg_ops->read(hw, &cap, sizeof(cap), pos);
 		if (ret < 0) {
 			PMD_INIT_LOG(ERR,
 				"failed to read pci cap at pos: %x", pos);
@@ -827,18 +969,25 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 
 		switch (cap.cfg_type) {
 		case VIRTIO_PCI_CAP_COMMON_CFG:
-			hw->common_cfg = get_cfg_addr(dev, &cap);
+			hw->common_cfg =
+				hw->vtpci_cfg_ops->get_mapped_addr(
+					hw, cap.bar, cap.offset, cap.length);
 			break;
 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
-			rte_eal_pci_read_config(dev, &hw->notify_off_multiplier,
+			hw->vtpci_cfg_ops->read(hw, &hw->notify_off_multiplier,
 						4, pos + sizeof(cap));
-			hw->notify_base = get_cfg_addr(dev, &cap);
+			hw->notify_base =
+				hw->vtpci_cfg_ops->get_mapped_addr(
+					hw, cap.bar, cap.offset, cap.length);
 			break;
 		case VIRTIO_PCI_CAP_DEVICE_CFG:
-			hw->dev_cfg = get_cfg_addr(dev, &cap);
+			hw->dev_cfg =
+				hw->vtpci_cfg_ops->get_mapped_addr(
+					hw, cap.bar, cap.offset, cap.length);
 			break;
 		case VIRTIO_PCI_CAP_ISR_CFG:
-			hw->isr = get_cfg_addr(dev, &cap);
+			hw->isr = hw->vtpci_cfg_ops->get_mapped_addr(
+					hw, cap.bar, cap.offset, cap.length);
 			break;
 		}
 
@@ -863,43 +1012,87 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 	return 0;
 }
 
+static int
+vtpci_modern_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	PMD_INIT_LOG(INFO, "modern virtio pci detected.");
+
+	if (dev->dev_type == RTE_ETH_DEV_PCI)
+		pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+
+	hw->vtpci_ops = &modern_ops;
+	hw->modern = 1;
+
+	return 0;
+}
+
+static int
+vtpci_legacy_init(struct rte_eth_dev *dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = dev->pci_dev;
+
+	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		if (legacy_virtio_resource_init(pci_dev) < 0)
+			return -1;
+
+		hw->use_msix = legacy_virtio_has_msix(&pci_dev->addr);
+	}
+
+	hw->io_base = (uint32_t)(uintptr_t)
+		hw->vtpci_cfg_ops->get_mapped_addr(hw, 0, 0, 0);
+	hw->vtpci_ops = &legacy_ops;
+	hw->modern = 0;
+
+	return 0;
+}
+
 int
-vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
 {
-	hw->dev = dev;
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	int ret;
+
+	hw->dev = pci_dev;
+
+	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) && (pci_dev == NULL)) {
+		PMD_INIT_LOG(INFO, "No pci device specified.");
+		return -1;
+	}
+
+	if (vtpci_cfg_init(eth_dev, hw) < 0)
+		return -1;
 
 	/*
 	 * Try if we can succeed reading virtio pci caps, which exists
 	 * only on modern pci device. If failed, we fallback to legacy
 	 * virtio handling.
 	 */
-	if (virtio_read_caps(dev, hw) == 0) {
-		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
-		hw->vtpci_ops = &modern_ops;
-		hw->modern    = 1;
-		dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
-		return 0;
-	}
+	if (virtio_read_caps(hw) == 0)
+		ret = vtpci_modern_init(eth_dev, hw);
+	else
+		ret = vtpci_legacy_init(eth_dev, hw);
 
-	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
-	if (legacy_virtio_resource_init(dev) < 0)
+	if (ret < 0)
 		return -1;
 
-	hw->vtpci_ops = &legacy_ops;
-	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
-	hw->io_base  = (uint32_t)(uintptr_t)dev->mem_resource[0].addr;
-	hw->modern   = 0;
+	if (vtpci_dev_init(eth_dev, hw) < 0)
+		return -1;
 
 	return 0;
 }
 
 void
-vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_uninit(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
 {
 	hw->dev  = NULL;
 	hw->vtpci_ops = NULL;
 	hw->use_msix = 0;
 	hw->io_base  = 0;
 	hw->modern   = 0;
-	rte_eal_pci_unmap_device(dev);
+	hw->vtpci_cfg_ops->unmap(hw);
+	vtpci_dev_uninit(eth_dev, hw);
+	vtpci_cfg_uninit(eth_dev, hw);
 }
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 17c7972..7b5ad54 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -222,6 +222,24 @@ struct virtio_pci_common_cfg {
 
 struct virtio_hw;
 
+/* Functions to access pci configuration space */
+struct virtio_pci_cfg_ops {
+	int (*map)(struct virtio_hw *hw);
+	void (*unmap)(struct virtio_hw *hw);
+	void *(*get_mapped_addr)(struct virtio_hw *hw, uint8_t bar, uint32_t offset, uint32_t length);
+	int (*read)(struct virtio_hw *hw, void *buf, size_t len, off_t offset);
+};
+
+/* Functions to access pci device registers */
+struct virtio_pci_dev_ops {
+	uint8_t (*read8)(struct virtio_hw *hw, uint8_t *addr);
+	uint16_t (*read16)(struct virtio_hw *hw, uint16_t *addr);
+	uint32_t (*read32)(struct virtio_hw *hw, uint32_t *addr);
+	void (*write8)(struct virtio_hw *hw, uint8_t *addr, uint8_t val);
+	void (*write16)(struct virtio_hw *hw, uint16_t *addr, uint16_t val);
+	void (*write32)(struct virtio_hw *hw, uint32_t *addr, uint32_t val);
+};
+
 struct virtio_pci_ops {
 	void (*read_dev_cfg)(struct virtio_hw *hw, size_t offset,
 			     void *dst, int len);
@@ -266,6 +284,8 @@ struct virtio_hw {
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	const struct virtio_pci_ops *vtpci_ops;
+	const struct virtio_pci_cfg_ops *vtpci_cfg_ops;
+	const struct virtio_pci_dev_ops *vtpci_dev_ops;
 };
 
 /*
@@ -327,8 +347,8 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
 /*
  * Function declaration from virtio_pci.c
  */
-int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
-void vtpci_uninit(struct rte_pci_device *dev, struct virtio_hw *);
+int vtpci_init(struct rte_eth_dev *, struct virtio_hw *);
+void vtpci_uninit(struct rte_eth_dev *, struct virtio_hw *);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-28  2:47       ` Tetsuya Mukawa
@ 2016-01-28  9:48         ` Xie, Huawei
  2016-01-28  9:53           ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Xie, Huawei @ 2016-01-28  9:48 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/28/2016 10:47 AM, Tetsuya Mukawa wrote:
> On 2016/01/28 0:58, Xie, Huawei wrote:
>> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>> [snip]
>>> +
>>> +static int
>>> +qtest_raw_recv(int fd, char *buf, size_t count)
>>> +{
>>> +	size_t len = count;
>>> +	size_t total_len = 0;
>>> +	int ret = 0;
>>> +
>>> +	while (len > 0) {
>>> +		ret = read(fd, buf, len);
>>> +		if (ret == (int)len)
>>> +			break;
>>> +		if (*(buf + ret - 1) == '\n')
>>> +			break;
>> The above two lines should be put after the below if block.
> Yes, it should be so.
>
>>> +		if (ret == -1) {
>>> +			if (errno == EINTR)
>>> +				continue;
>>> +			return ret;
>>> +		}
>>> +		total_len += ret;
>>> +		buf += ret;
>>> +		len -= ret;
>>> +	}
>>> +	return total_len + ret;
>>> +}
>>> +
>> [snip]
>>
>>> +
>>> +static void
>>> +qtest_handle_one_message(struct qtest_session *s, char *buf)
>>> +{
>>> +	int ret;
>>> +
>>> +	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
>>> +		if (rte_atomic16_read(&s->enable_intr) == 0)
>>> +			return;
>>> +
>>> +		/* relay interrupt to pipe */
>>> +		ret = write(s->irqfds.writefd, "1", 1);
>>> +		if (ret < 0)
>>> +			rte_panic("cannot relay interrupt\n");
>>> +	} else {
>>> +		/* relay normal message to pipe */
>>> +		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
>>> +		if (ret < 0)
>>> +			rte_panic("cannot relay normal message\n");
>>> +	}
>>> +}
>>> +
>>> +static char *
>>> +qtest_get_next_message(char *p)
>>> +{
>>> +	p = strchr(p, '\n');
>>> +	if ((p == NULL) || (*(p + 1) == '\0'))
>>> +		return NULL;
>>> +	return p + 1;
>>> +}
>>> +
>>> +static void
>>> +qtest_close_one_socket(int *fd)
>>> +{
>>> +	if (*fd > 0) {
>>> +		close(*fd);
>>> +		*fd = -1;
>>> +	}
>>> +}
>>> +
>>> +static void
>>> +qtest_close_sockets(struct qtest_session *s)
>>> +{
>>> +	qtest_close_one_socket(&s->qtest_socket);
>>> +	qtest_close_one_socket(&s->msgfds.readfd);
>>> +	qtest_close_one_socket(&s->msgfds.writefd);
>>> +	qtest_close_one_socket(&s->irqfds.readfd);
>>> +	qtest_close_one_socket(&s->irqfds.writefd);
>>> +	qtest_close_one_socket(&s->ivshmem_socket);
>>> +}
>>> +
>>> +/*
>>> + * This thread relays QTest response using pipe.
>>> + * The function is needed because we need to separate IRQ message from others.
>>> + */
>>> +static void *
>>> +qtest_event_handler(void *data) {
>>> +	struct qtest_session *s = (struct qtest_session *)data;
>>> +	char buf[1024];
>>> +	char *p;
>>> +	int ret;
>>> +
>>> +	for (;;) {
>>> +		memset(buf, 0, sizeof(buf));
>>> +		ret = qtest_raw_recv(s->qtest_socket, buf, sizeof(buf));
>>> +		if (ret < 0) {
>>> +			qtest_close_sockets(s);
>>> +			return NULL;
>>> +		}
>>> +
>>> +		/* may receive multiple messages at the same time */
>> From the qtest_raw_recv implementation, if at some point one message is
>> received by two qtest_raw_recv calls, then is that message discarded?
>> We could save the last incomplete message in buffer, and combine the
>> message received next time together.
> I guess we don't lose replies from QEMU.
> Please let me describe more.
>
> According to the qtest specification, after sending a message, we need
> to receive a reply like below.
> APP: ---command---> QEMU
> APP: <-----------OK---- QEMU
>
> But, to handle interrupt message, we need to take care below case.
> APP: ---command---> QEMU
> APP: <---interrupt---- QEMU
> APP: <-----------OK---- QEMU
>
> Also, we need to handle a case like multiple threads tries to send a
> qtest message.
> Anyway, here is current implementation.
>
> So far, we have 3 types of sockets.
> 1. socket for qtest messaging.
> 2. socket for relaying normal message.
> 3. socket for relaying interrupt message.
>
> About read direction:
> The qtest socket is only read by "qtest_event_handler". The handler may
> receive multiple messages at once.

I think there are two assumptions that all messages are ended with "\n"
and the sizeof(buf) could hold the maximum length of sum of all multiple
messages that QEMU could send at one time.
Otherwise in the last read call of qtest_raw_receive, you might receive
only part of the a message.

> In the case,  the handler split messages, and send it to normal message
> socket or interrupt message socket.
>
> About write direction:
> The qtest socket will be written by below functions.
>  - qtest_raw_in/out
>  - qtest_raw_read/write
> But all functions that use above functions need to have mutex before
> sending messages.
> So all messaging will not be overlapped, then only one thread will read
> the socket for relaying normal message.
>
> Tetsuya
>


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-28  9:48         ` Xie, Huawei
@ 2016-01-28  9:53           ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-01-28  9:53 UTC (permalink / raw)
  To: Xie, Huawei, dev, yuanhan.liu, Tan, Jianfeng

On 2016/01/28 18:48, Xie, Huawei wrote:
> On 1/28/2016 10:47 AM, Tetsuya Mukawa wrote:
>> On 2016/01/28 0:58, Xie, Huawei wrote:
>>> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>>> [snip]
>>>> +
>>>> +static int
>>>> +qtest_raw_recv(int fd, char *buf, size_t count)
>>>> +{
>>>> +	size_t len = count;
>>>> +	size_t total_len = 0;
>>>> +	int ret = 0;
>>>> +
>>>> +	while (len > 0) {
>>>> +		ret = read(fd, buf, len);
>>>> +		if (ret == (int)len)
>>>> +			break;
>>>> +		if (*(buf + ret - 1) == '\n')
>>>> +			break;
>>> The above two lines should be put after the below if block.
>> Yes, it should be so.
>>
>>>> +		if (ret == -1) {
>>>> +			if (errno == EINTR)
>>>> +				continue;
>>>> +			return ret;
>>>> +		}
>>>> +		total_len += ret;
>>>> +		buf += ret;
>>>> +		len -= ret;
>>>> +	}
>>>> +	return total_len + ret;
>>>> +}
>>>> +
>>> [snip]
>>>
>>>> +
>>>> +static void
>>>> +qtest_handle_one_message(struct qtest_session *s, char *buf)
>>>> +{
>>>> +	int ret;
>>>> +
>>>> +	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
>>>> +		if (rte_atomic16_read(&s->enable_intr) == 0)
>>>> +			return;
>>>> +
>>>> +		/* relay interrupt to pipe */
>>>> +		ret = write(s->irqfds.writefd, "1", 1);
>>>> +		if (ret < 0)
>>>> +			rte_panic("cannot relay interrupt\n");
>>>> +	} else {
>>>> +		/* relay normal message to pipe */
>>>> +		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
>>>> +		if (ret < 0)
>>>> +			rte_panic("cannot relay normal message\n");
>>>> +	}
>>>> +}
>>>> +
>>>> +static char *
>>>> +qtest_get_next_message(char *p)
>>>> +{
>>>> +	p = strchr(p, '\n');
>>>> +	if ((p == NULL) || (*(p + 1) == '\0'))
>>>> +		return NULL;
>>>> +	return p + 1;
>>>> +}
>>>> +
>>>> +static void
>>>> +qtest_close_one_socket(int *fd)
>>>> +{
>>>> +	if (*fd > 0) {
>>>> +		close(*fd);
>>>> +		*fd = -1;
>>>> +	}
>>>> +}
>>>> +
>>>> +static void
>>>> +qtest_close_sockets(struct qtest_session *s)
>>>> +{
>>>> +	qtest_close_one_socket(&s->qtest_socket);
>>>> +	qtest_close_one_socket(&s->msgfds.readfd);
>>>> +	qtest_close_one_socket(&s->msgfds.writefd);
>>>> +	qtest_close_one_socket(&s->irqfds.readfd);
>>>> +	qtest_close_one_socket(&s->irqfds.writefd);
>>>> +	qtest_close_one_socket(&s->ivshmem_socket);
>>>> +}
>>>> +
>>>> +/*
>>>> + * This thread relays QTest response using pipe.
>>>> + * The function is needed because we need to separate IRQ message from others.
>>>> + */
>>>> +static void *
>>>> +qtest_event_handler(void *data) {
>>>> +	struct qtest_session *s = (struct qtest_session *)data;
>>>> +	char buf[1024];
>>>> +	char *p;
>>>> +	int ret;
>>>> +
>>>> +	for (;;) {
>>>> +		memset(buf, 0, sizeof(buf));
>>>> +		ret = qtest_raw_recv(s->qtest_socket, buf, sizeof(buf));
>>>> +		if (ret < 0) {
>>>> +			qtest_close_sockets(s);
>>>> +			return NULL;
>>>> +		}
>>>> +
>>>> +		/* may receive multiple messages at the same time */
>>> From the qtest_raw_recv implementation, if at some point one message is
>>> received by two qtest_raw_recv calls, then is that message discarded?
>>> We could save the last incomplete message in buffer, and combine the
>>> message received next time together.
>> I guess we don't lose replies from QEMU.
>> Please let me describe more.
>>
>> According to the qtest specification, after sending a message, we need
>> to receive a reply like below.
>> APP: ---command---> QEMU
>> APP: <-----------OK---- QEMU
>>
>> But, to handle interrupt message, we need to take care below case.
>> APP: ---command---> QEMU
>> APP: <---interrupt---- QEMU
>> APP: <-----------OK---- QEMU
>>
>> Also, we need to handle a case like multiple threads tries to send a
>> qtest message.
>> Anyway, here is current implementation.
>>
>> So far, we have 3 types of sockets.
>> 1. socket for qtest messaging.
>> 2. socket for relaying normal message.
>> 3. socket for relaying interrupt message.
>>
>> About read direction:
>> The qtest socket is only read by "qtest_event_handler". The handler may
>> receive multiple messages at once.
> I think there are two assumptions that all messages are ended with "\n"
> and the sizeof(buf) could hold the maximum length of sum of all multiple
> messages that QEMU could send at one time.
> Otherwise in the last read call of qtest_raw_receive, you might receive
> only part of the a message.

I've got your point. I will fix above case.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-28  2:44       ` Tetsuya Mukawa
@ 2016-01-29  8:56         ` Xie, Huawei
  0 siblings, 0 replies; 120+ messages in thread
From: Xie, Huawei @ 2016-01-29  8:56 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev, yuanhan.liu, Tan, Jianfeng

On 1/28/2016 10:44 AM, Tetsuya Mukawa wrote:
> On 2016/01/27 19:03, Xie, Huawei wrote:
>> On 1/21/2016 7:09 PM, Tetsuya Mukawa wrote:
>>> +	/* Set BAR region */
>>> +	for (i = 0; i < NB_BAR; i++) {
>>> +		switch (dev->bar[i].type) {
>>> +		case QTEST_PCI_BAR_IO:
>>> +		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
>>> +		case QTEST_PCI_BAR_MEMORY_32:
>>> +			qtest_pci_outl(s, bus, device, 0, dev->bar[i].addr,
>>> +				dev->bar[i].region_start);
>>> +			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
>>> +				dev->name, dev->bar[i].region_start,
>>> +				dev->bar[i].region_start + dev->bar[i].region_size);
>>> +			break;
>>> +		case QTEST_PCI_BAR_MEMORY_64:
>>> +			qtest_pci_outq(s, bus, device, 0, dev->bar[i].addr,
>>> +				dev->bar[i].region_start);
>>> +			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
>>> +				dev->name, dev->bar[i].region_start,
>>> +				dev->bar[i].region_start + dev->bar[i].region_size);
>>> +			break;
>> Hasn't the bar resource already been allocated? Is it the app's
>> responsibility to allocate the bar resource in qtest mode? The app
>> couldn't have that knowledge.
> Yes. In qtest mode, the app should register above values.
> (Without it, default values are 0)
> Usually, this will be done by BIOS or uEFI. But in qtest mode, these
> will not be invoked.
> So we need to define above values, and also need to enable PCI devices.
>
> In this release, I just register hard coded values except for one of
> ivshmem BAR.
> In next release, I will describe memory map in comment.

I think ideally this app should do the whole PCI system
initialization(and also the north/south bridge) using the DFS algorithm
on behalf of the BIOS, to allocate resource for all bridge and devices.
Otherwise if QEMU follows the hardware platform's behavior to route
MMIO/IO access, if we only allocate resources for part of the devices,
the transactions could not be routed correctly.
Anyway we shouldn't bother us. It is OK as long as it works.


>
> Tetsuya
>
>


^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (5 preceding siblings ...)
  2016-01-27 16:45     ` Xie, Huawei
@ 2016-01-29  8:57     ` Yuanhan Liu
  2016-01-29  9:13       ` Yuanhan Liu
  2016-02-10  3:40     ` [PATCH v2 0/5] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
                       ` (5 subsequent siblings)
  12 siblings, 1 reply; 120+ messages in thread
From: Yuanhan Liu @ 2016-01-29  8:57 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev

On Thu, Jan 21, 2016 at 08:07:58PM +0900, Tetsuya Mukawa wrote:
> +static int
> +virt_read_pci_cfg(struct virtio_hw *hw, void *buf, size_t len, off_t offset)
> +{
> +	qtest_read_pci_cfg(hw, "virtio-net", buf, len, offset);
> +	return 0;
> +}
> +
> +static void *
> +virt_get_mapped_addr(struct virtio_hw *hw, uint8_t bar,
> +		     uint32_t offset, uint32_t length)
> +{
> +	uint64_t base;
> +	uint64_t size;
> +
> +	if (qtest_get_bar_size(hw, "virtio-net", bar, &size) < 0) {
> +		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
> +		return NULL;
> +	}
> +
> +	if (offset + length < offset) {
> +		PMD_INIT_LOG(ERR, "offset(%u) + lenght(%u) overflows",
> +			offset, length);
> +		return NULL;
> +	}
> +
> +	if (offset + length > size) {
> +		PMD_INIT_LOG(ERR,
> +			"invalid cap: overflows bar space: %u > %"PRIu64,
> +			offset + length, size);
> +		return NULL;
> +	}
> +
> +	if (qtest_get_bar_addr(hw, "virtio-net", bar, &base) < 0) {
> +		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
> +		return NULL;
> +	}

So, I understood the usage now, and the cfg_ops abstraction doesn't look
good yet necessary to me.  For EAL managed pci device, bar length and
addr are stored at memory_resources[], and for your case, it's from the
qtest. And judging that it's compile time decision, I'd like it to be:

    #ifdef /* RTE_LIBRTE_VIRTIO_HOST_MODE */
    
    static uint32_t
    get_bar_size(...)
    {
    	return qtest_get_bar_size(..);
    }
    
    static uint64-t
    get_bar_addr(...)
    {
    	return qtest_get_bar_addr(..);
    }
    
    ...
    ...
    
    #else
    
    static  uint32_t
    get_bar_size(...)
    {
    	return dev->mem_resource[bar].addr;
    }
    
    ...
    
    }
    #endif


And then you just need do related changes at virtio_read_caps() and
get_cfg_addr(). That'd be much simpler, without introducing duplicate
code and uncessary complex.

What do you think of that?

	--yliu

> +
> +	return (void *)(base + offset);
> +}
> +
> +static const struct virtio_pci_cfg_ops virt_cfg_ops = {
> +	.map			= virt_map_pci_cfg,
> +	.unmap			= virt_unmap_pci_cfg,
> +	.get_mapped_addr	= virt_get_mapped_addr,
> +	.read			= virt_read_pci_cfg,
> +};
> +#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-29  8:57     ` Yuanhan Liu
@ 2016-01-29  9:13       ` Yuanhan Liu
  2016-02-01  1:49         ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Yuanhan Liu @ 2016-01-29  9:13 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev

On Fri, Jan 29, 2016 at 04:57:23PM +0800, Yuanhan Liu wrote:
> On Thu, Jan 21, 2016 at 08:07:58PM +0900, Tetsuya Mukawa wrote:
> > +static int
> > +virt_read_pci_cfg(struct virtio_hw *hw, void *buf, size_t len, off_t offset)
> > +{
> > +	qtest_read_pci_cfg(hw, "virtio-net", buf, len, offset);
> > +	return 0;
> > +}
> > +
> > +static void *
> > +virt_get_mapped_addr(struct virtio_hw *hw, uint8_t bar,
> > +		     uint32_t offset, uint32_t length)
> > +{
> > +	uint64_t base;
> > +	uint64_t size;
> > +
> > +	if (qtest_get_bar_size(hw, "virtio-net", bar, &size) < 0) {
> > +		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
> > +		return NULL;
> > +	}
> > +
> > +	if (offset + length < offset) {
> > +		PMD_INIT_LOG(ERR, "offset(%u) + lenght(%u) overflows",
> > +			offset, length);
> > +		return NULL;
> > +	}
> > +
> > +	if (offset + length > size) {
> > +		PMD_INIT_LOG(ERR,
> > +			"invalid cap: overflows bar space: %u > %"PRIu64,
> > +			offset + length, size);
> > +		return NULL;
> > +	}
> > +
> > +	if (qtest_get_bar_addr(hw, "virtio-net", bar, &base) < 0) {
> > +		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
> > +		return NULL;
> > +	}
> 
> So, I understood the usage now, and the cfg_ops abstraction doesn't look
> good yet necessary to me.  For EAL managed pci device, bar length and
> addr are stored at memory_resources[], and for your case, it's from the
> qtest. And judging that it's compile time decision, I'd like it to be:
> 
>     #ifdef /* RTE_LIBRTE_VIRTIO_HOST_MODE */

Oops, sorry, I was wrong. Your code could be co-exist with the
traditional virtio pmd driver, thus we can't do that.

But still, I think dynamic "if ... else ..." should be better:
there are just few places (maybe 4: bar_size, bar length, map
device, read config) need that.


On the other hand, if you really want to do that abstraction,
you should go it with more fine granularity, such as the following
methods I proposed, instead of the big one: get_cfg_addr(). In
that way, we could avoid duplicate code.

	--yliu

>     
>     static uint32_t
>     get_bar_size(...)
>     {
>     	return qtest_get_bar_size(..);
>     }
>     
>     static uint64-t
>     get_bar_addr(...)
>     {
>     	return qtest_get_bar_addr(..);
>     }
>     
>     ...
>     ...
>     
>     #else
>     
>     static  uint32_t
>     get_bar_size(...)
>     {
>     	return dev->mem_resource[bar].addr;
>     }
>     
>     ...
>     
>     }
>     #endif
> 
> 
> And then you just need do related changes at virtio_read_caps() and
> get_cfg_addr(). That'd be much simpler, without introducing duplicate
> code and uncessary complex.
> 
> What do you think of that?
> 
> 	--yliu
> 
> > +
> > +	return (void *)(base + offset);
> > +}
> > +
> > +static const struct virtio_pci_cfg_ops virt_cfg_ops = {
> > +	.map			= virt_map_pci_cfg,
> > +	.unmap			= virt_unmap_pci_cfg,
> > +	.get_mapped_addr	= virt_get_mapped_addr,
> > +	.read			= virt_read_pci_cfg,
> > +};
> > +#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method
  2016-01-28  9:33   ` [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
@ 2016-01-29  9:17     ` Yuanhan Liu
  2016-02-01  1:50       ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Yuanhan Liu @ 2016-01-29  9:17 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev

On Thu, Jan 28, 2016 at 06:33:32PM +0900, Tetsuya Mukawa wrote:
> This patch addss function pointers to abstract pci access method.
> This abstraction layer will be used when virtio-net PMD supports
> container extension.
> 
> The below functions abstract how to access to pci configuration space.
> 
> struct virtio_pci_cfg_ops {
>         int   (*map)(...);
>         void  (*unmap)(...);
>         void *(*get_mapped_addr)(...);
>         int   (*read)(...);
> };
> 
> The pci configuration space has information how to access to virtio
> device registers. Basically, there are 2 ways to acccess to the
> registers. One is using portio and the other is using mapped memory.
> The below functions abstract this access method.

One question: is there a way to map PCI memory with Qtest? I'm thinking
if we can keep the io_read/write() for Qtest as well, if so, code could
be simplified, a lot, IMO.

	--yliu

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment
  2016-01-29  9:13       ` Yuanhan Liu
@ 2016-02-01  1:49         ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-01  1:49 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev

On 2016/01/29 18:13, Yuanhan Liu wrote:
> On Fri, Jan 29, 2016 at 04:57:23PM +0800, Yuanhan Liu wrote:
>> On Thu, Jan 21, 2016 at 08:07:58PM +0900, Tetsuya Mukawa wrote:
>>> +static int
>>> +virt_read_pci_cfg(struct virtio_hw *hw, void *buf, size_t len, off_t offset)
>>> +{
>>> +	qtest_read_pci_cfg(hw, "virtio-net", buf, len, offset);
>>> +	return 0;
>>> +}
>>> +
>>> +static void *
>>> +virt_get_mapped_addr(struct virtio_hw *hw, uint8_t bar,
>>> +		     uint32_t offset, uint32_t length)
>>> +{
>>> +	uint64_t base;
>>> +	uint64_t size;
>>> +
>>> +	if (qtest_get_bar_size(hw, "virtio-net", bar, &size) < 0) {
>>> +		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
>>> +		return NULL;
>>> +	}
>>> +
>>> +	if (offset + length < offset) {
>>> +		PMD_INIT_LOG(ERR, "offset(%u) + lenght(%u) overflows",
>>> +			offset, length);
>>> +		return NULL;
>>> +	}
>>> +
>>> +	if (offset + length > size) {
>>> +		PMD_INIT_LOG(ERR,
>>> +			"invalid cap: overflows bar space: %u > %"PRIu64,
>>> +			offset + length, size);
>>> +		return NULL;
>>> +	}
>>> +
>>> +	if (qtest_get_bar_addr(hw, "virtio-net", bar, &base) < 0) {
>>> +		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
>>> +		return NULL;
>>> +	}
>> So, I understood the usage now, and the cfg_ops abstraction doesn't look
>> good yet necessary to me.  For EAL managed pci device, bar length and
>> addr are stored at memory_resources[], and for your case, it's from the
>> qtest. And judging that it's compile time decision, I'd like it to be:
>>
>>     #ifdef /* RTE_LIBRTE_VIRTIO_HOST_MODE */
> Oops, sorry, I was wrong. Your code could be co-exist with the
> traditional virtio pmd driver, thus we can't do that.
>
> But still, I think dynamic "if ... else ..." should be better:
> there are just few places (maybe 4: bar_size, bar length, map
> device, read config) need that.

Thanks for comments.
I will use "if ... else ...." instead of introducing a cfg_ops.

Tetsuya


>
> On the other hand, if you really want to do that abstraction,
> you should go it with more fine granularity, such as the following
> methods I proposed, instead of the big one: get_cfg_addr(). In
> that way, we could avoid duplicate code.
>
> 	--yliu
>
>>     
>>     static uint32_t
>>     get_bar_size(...)
>>     {
>>     	return qtest_get_bar_size(..);
>>     }
>>     
>>     static uint64-t
>>     get_bar_addr(...)
>>     {
>>     	return qtest_get_bar_addr(..);
>>     }
>>     
>>     ...
>>     ...
>>     
>>     #else
>>     
>>     static  uint32_t
>>     get_bar_size(...)
>>     {
>>     	return dev->mem_resource[bar].addr;
>>     }
>>     
>>     ...
>>     
>>     }
>>     #endif
>>
>>
>> And then you just need do related changes at virtio_read_caps() and
>> get_cfg_addr(). That'd be much simpler, without introducing duplicate
>> code and uncessary complex.
>>
>> What do you think of that?
>>
>> 	--yliu
>>
>>> +
>>> +	return (void *)(base + offset);
>>> +}
>>> +
>>> +static const struct virtio_pci_cfg_ops virt_cfg_ops = {
>>> +	.map			= virt_map_pci_cfg,
>>> +	.unmap			= virt_unmap_pci_cfg,
>>> +	.get_mapped_addr	= virt_get_mapped_addr,
>>> +	.read			= virt_read_pci_cfg,
>>> +};
>>> +#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method
  2016-01-29  9:17     ` Yuanhan Liu
@ 2016-02-01  1:50       ` Tetsuya Mukawa
  2016-02-01 13:15         ` Yuanhan Liu
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-01  1:50 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev

On 2016/01/29 18:17, Yuanhan Liu wrote:
> On Thu, Jan 28, 2016 at 06:33:32PM +0900, Tetsuya Mukawa wrote:
>> This patch addss function pointers to abstract pci access method.
>> This abstraction layer will be used when virtio-net PMD supports
>> container extension.
>>
>> The below functions abstract how to access to pci configuration space.
>>
>> struct virtio_pci_cfg_ops {
>>         int   (*map)(...);
>>         void  (*unmap)(...);
>>         void *(*get_mapped_addr)(...);
>>         int   (*read)(...);
>> };
>>
>> The pci configuration space has information how to access to virtio
>> device registers. Basically, there are 2 ways to acccess to the
>> registers. One is using portio and the other is using mapped memory.
>> The below functions abstract this access method.
> One question: is there a way to map PCI memory with Qtest? I'm thinking
> if we can keep the io_read/write() for Qtest as well, if so, code could
> be simplified, a lot, IMO.
>

Yes, I agree with you.
But AFAIK, we don't have a way to mmap it from DPDK application.

We may be able to map PCI configuration space to a memory address space
that guest CPU can handle.
But even in this case, I guess we cannot access the memory without qtest
messaging.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method
  2016-02-01  1:50       ` Tetsuya Mukawa
@ 2016-02-01 13:15         ` Yuanhan Liu
  2016-02-02  2:19           ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Yuanhan Liu @ 2016-02-01 13:15 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev

On Mon, Feb 01, 2016 at 10:50:00AM +0900, Tetsuya Mukawa wrote:
> On 2016/01/29 18:17, Yuanhan Liu wrote:
> > On Thu, Jan 28, 2016 at 06:33:32PM +0900, Tetsuya Mukawa wrote:
> >> This patch addss function pointers to abstract pci access method.
> >> This abstraction layer will be used when virtio-net PMD supports
> >> container extension.
> >>
> >> The below functions abstract how to access to pci configuration space.
> >>
> >> struct virtio_pci_cfg_ops {
> >>         int   (*map)(...);
> >>         void  (*unmap)(...);
> >>         void *(*get_mapped_addr)(...);
> >>         int   (*read)(...);
> >> };
> >>
> >> The pci configuration space has information how to access to virtio
> >> device registers. Basically, there are 2 ways to acccess to the
> >> registers. One is using portio and the other is using mapped memory.
> >> The below functions abstract this access method.
> > One question: is there a way to map PCI memory with Qtest? I'm thinking
> > if we can keep the io_read/write() for Qtest as well, if so, code could
> > be simplified, a lot, IMO.
> >
> 
> Yes, I agree with you.
> But AFAIK, we don't have a way to mmap it from DPDK application.
> 
> We may be able to map PCI configuration space to a memory address space
> that guest CPU can handle.
> But even in this case, I guess we cannot access the memory without qtest
> messaging.

Acutally, I have a concern about this access abstraction, which makes
those simple funciton not inline. It won't be an issue for most of them,
as most of them are invoked during init stage, where has no impact on
performance.

notify_queue(), however, is a bit different. I was thinking the "inline
to callback (not inline)" convertion might has some impacts on the
performance. Would you do a test for me?

Another off-topic remind is that I guess you might need to send a new
version of your vhost-pmd patchset, the sooner the better. Chinese new
year is coming; I'm having vacation since the end of this week (And,
Huawei has been on vacation sine the end of last week). I hope we could
make it in v2.3.

	--yliu

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method
  2016-02-01 13:15         ` Yuanhan Liu
@ 2016-02-02  2:19           ` Tetsuya Mukawa
  2016-02-02  2:45             ` Yuanhan Liu
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-02  2:19 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev

On 2016/02/01 22:15, Yuanhan Liu wrote:
> On Mon, Feb 01, 2016 at 10:50:00AM +0900, Tetsuya Mukawa wrote:
>> On 2016/01/29 18:17, Yuanhan Liu wrote:
>>> On Thu, Jan 28, 2016 at 06:33:32PM +0900, Tetsuya Mukawa wrote:
>>>> This patch addss function pointers to abstract pci access method.
>>>> This abstraction layer will be used when virtio-net PMD supports
>>>> container extension.
>>>>
>>>> The below functions abstract how to access to pci configuration space.
>>>>
>>>> struct virtio_pci_cfg_ops {
>>>>         int   (*map)(...);
>>>>         void  (*unmap)(...);
>>>>         void *(*get_mapped_addr)(...);
>>>>         int   (*read)(...);
>>>> };
>>>>
>>>> The pci configuration space has information how to access to virtio
>>>> device registers. Basically, there are 2 ways to acccess to the
>>>> registers. One is using portio and the other is using mapped memory.
>>>> The below functions abstract this access method.
>>> One question: is there a way to map PCI memory with Qtest? I'm thinking
>>> if we can keep the io_read/write() for Qtest as well, if so, code could
>>> be simplified, a lot, IMO.
>>>
>> Yes, I agree with you.
>> But AFAIK, we don't have a way to mmap it from DPDK application.
>>
>> We may be able to map PCI configuration space to a memory address space
>> that guest CPU can handle.
>> But even in this case, I guess we cannot access the memory without qtest
>> messaging.
> Acutally, I have a concern about this access abstraction, which makes
> those simple funciton not inline. It won't be an issue for most of them,
> as most of them are invoked during init stage, where has no impact on
> performance.
>
> notify_queue(), however, is a bit different. I was thinking the "inline
> to callback (not inline)" convertion might has some impacts on the
> performance. Would you do a test for me?

Sure, I will be able to.
But if we concern about it, I guess it's also nice to implement the PMD
on your vtpci abstraction.
(It means we don't use the access abstraction)
Probably this lets our merging process faster.
What do you think?
Also I guess Jianfeng will implement his PMD on your abstraction.
If so, I will also follow him.
 
>
> Another off-topic remind is that I guess you might need to send a new
> version of your vhost-pmd patchset, the sooner the better. Chinese new
> year is coming; I'm having vacation since the end of this week (And,
> Huawei has been on vacation sine the end of last week). I hope we could
> make it in v2.3.

Thanks for notification. Sure I will submit it soon.

Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method
  2016-02-02  2:19           ` Tetsuya Mukawa
@ 2016-02-02  2:45             ` Yuanhan Liu
  2016-02-02  3:55               ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Yuanhan Liu @ 2016-02-02  2:45 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev

On Tue, Feb 02, 2016 at 11:19:50AM +0900, Tetsuya Mukawa wrote:
> On 2016/02/01 22:15, Yuanhan Liu wrote:
> > On Mon, Feb 01, 2016 at 10:50:00AM +0900, Tetsuya Mukawa wrote:
> >> On 2016/01/29 18:17, Yuanhan Liu wrote:
> >>> On Thu, Jan 28, 2016 at 06:33:32PM +0900, Tetsuya Mukawa wrote:
> >>>> This patch addss function pointers to abstract pci access method.
> >>>> This abstraction layer will be used when virtio-net PMD supports
> >>>> container extension.
> >>>>
> >>>> The below functions abstract how to access to pci configuration space.
> >>>>
> >>>> struct virtio_pci_cfg_ops {
> >>>>         int   (*map)(...);
> >>>>         void  (*unmap)(...);
> >>>>         void *(*get_mapped_addr)(...);
> >>>>         int   (*read)(...);
> >>>> };
> >>>>
> >>>> The pci configuration space has information how to access to virtio
> >>>> device registers. Basically, there are 2 ways to acccess to the
> >>>> registers. One is using portio and the other is using mapped memory.
> >>>> The below functions abstract this access method.
> >>> One question: is there a way to map PCI memory with Qtest? I'm thinking
> >>> if we can keep the io_read/write() for Qtest as well, if so, code could
> >>> be simplified, a lot, IMO.
> >>>
> >> Yes, I agree with you.
> >> But AFAIK, we don't have a way to mmap it from DPDK application.
> >>
> >> We may be able to map PCI configuration space to a memory address space
> >> that guest CPU can handle.
> >> But even in this case, I guess we cannot access the memory without qtest
> >> messaging.
> > Acutally, I have a concern about this access abstraction, which makes
> > those simple funciton not inline. It won't be an issue for most of them,
> > as most of them are invoked during init stage, where has no impact on
> > performance.
> >
> > notify_queue(), however, is a bit different. I was thinking the "inline
> > to callback (not inline)" convertion might has some impacts on the
> > performance. Would you do a test for me?
> 
> Sure, I will be able to.

Thanks.

> But if we concern about it, I guess it's also nice to implement the PMD
> on your vtpci abstraction.
> (It means we don't use the access abstraction)
> Probably this lets our merging process faster.
> What do you think?

Another standalone PMD driver? (sorry that I didn't follow the
discussion). If so, won't it introduce too much duplicate code?

	--yliu

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method
  2016-02-02  2:45             ` Yuanhan Liu
@ 2016-02-02  3:55               ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-02  3:55 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev

On 2016/02/02 11:45, Yuanhan Liu wrote:
> On Tue, Feb 02, 2016 at 11:19:50AM +0900, Tetsuya Mukawa wrote:
>> On 2016/02/01 22:15, Yuanhan Liu wrote:
>>> On Mon, Feb 01, 2016 at 10:50:00AM +0900, Tetsuya Mukawa wrote:
>>>> On 2016/01/29 18:17, Yuanhan Liu wrote:
>>>>> On Thu, Jan 28, 2016 at 06:33:32PM +0900, Tetsuya Mukawa wrote:
>>>>>> This patch addss function pointers to abstract pci access method.
>>>>>> This abstraction layer will be used when virtio-net PMD supports
>>>>>> container extension.
>>>>>>
>>>>>> The below functions abstract how to access to pci configuration space.
>>>>>>
>>>>>> struct virtio_pci_cfg_ops {
>>>>>>         int   (*map)(...);
>>>>>>         void  (*unmap)(...);
>>>>>>         void *(*get_mapped_addr)(...);
>>>>>>         int   (*read)(...);
>>>>>> };
>>>>>>
>>>>>> The pci configuration space has information how to access to virtio
>>>>>> device registers. Basically, there are 2 ways to acccess to the
>>>>>> registers. One is using portio and the other is using mapped memory.
>>>>>> The below functions abstract this access method.
>>>>> One question: is there a way to map PCI memory with Qtest? I'm thinking
>>>>> if we can keep the io_read/write() for Qtest as well, if so, code could
>>>>> be simplified, a lot, IMO.
>>>>>
>>>> Yes, I agree with you.
>>>> But AFAIK, we don't have a way to mmap it from DPDK application.
>>>>
>>>> We may be able to map PCI configuration space to a memory address space
>>>> that guest CPU can handle.
>>>> But even in this case, I guess we cannot access the memory without qtest
>>>> messaging.
>>> Acutally, I have a concern about this access abstraction, which makes
>>> those simple funciton not inline. It won't be an issue for most of them,
>>> as most of them are invoked during init stage, where has no impact on
>>> performance.
>>>
>>> notify_queue(), however, is a bit different. I was thinking the "inline
>>> to callback (not inline)" convertion might has some impacts on the
>>> performance. Would you do a test for me?
>> Sure, I will be able to.
> Thanks.
>
>> But if we concern about it, I guess it's also nice to implement the PMD
>> on your vtpci abstraction.
>> (It means we don't use the access abstraction)
>> Probably this lets our merging process faster.
>> What do you think?
> Another standalone PMD driver? (sorry that I didn't follow the
> discussion).

Yes, Jianfeng will submit one more virtual virtio-net PMD.

>  If so, won't it introduce too much duplicate code?

Quick look, I guess we won't have not so much duplicated code.

Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH v2 0/5] Virtio-net PMD: QEMU QTest extension for container
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (6 preceding siblings ...)
  2016-01-29  8:57     ` Yuanhan Liu
@ 2016-02-10  3:40     ` Tetsuya Mukawa
  2016-02-10  3:40     ` [PATCH v2 1/5] virtio: Retrieve driver name from eth_dev Tetsuya Mukawa
                       ` (4 subsequent siblings)
  12 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-10  3:40 UTC (permalink / raw)
  To: dev

The patches will work on below patch series.
 - [PATCH v2 0/5] virtio support for container
 - [PATCH 0/4] rework ioport access for virtio

[Changes]
v2 changes:
 - Rebase on above patch seiries.
 - Rebase on master
 - Add "--qtest-virtio" EAL option.
 - Fixes in qtest.c
  - Fix error handling for the case qtest connection is closed.
  - Use eventfd for interrupt messaging.
  - Use linux header for PCI register definitions.
  - Fix qtest_raw_send/recv to handle error correctly.
  - Fix bit mask of PCI_CONFIG_ADDR.
  - Describe memory and ioport usage of qtest guest in qtest.c
  - Remove loop that is for finding PCI devices.


[Abstraction]

Normally, virtio-net PMD only works on VM, because there is no virtio-net device on host.
This patches extend  virtio-net PMD to be able to work on host as virtual PMD.
But we didn't implement virtio-net device as a part of virtio-net PMD.
To prepare virtio-net device for the PMD, start QEMU process with special QTest mode, then connect it from virtio-net PMD through unix domain socket.

The PMD can connect to anywhere QEMU virtio-net device can.
For example, the PMD can connects to vhost-net kernel module and vhost-user backend application.
Similar to virtio-net PMD on QEMU, application memory that uses virtio-net PMD will be shared between vhost backend application.
But vhost backend application memory will not be shared.

Main target of this PMD is container like docker, rkt, lxc and etc.
We can isolate related processes(virtio-net PMD process, QEMU and vhost-user backend process) by container.
But, to communicate through unix domain socket, shared directory will be needed.


[How to use]

 Please use QEMU-2.5.1, or above.
 (So far, QEMU-2.5.1 hasn't been released yet, so please checkout master from QEMU repository)

 - Compile
 Set "CONFIG_RTE_VIRTIO_VDEV_QTEST=y" in config/common_linux.
 Then compile it.

 - Start QEMU like below.
 $ qemu-system-x86_64 \
              -machine pc-i440fx-1.4,accel=qtest \
              -display none -qtest-log /dev/null \
              -qtest unix:/tmp/socket,server \
              -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
              -device virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
              -chardev socket,id=chr1,path=/tmp/ivshmem,server \
              -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4

 - Start DPDK application like below
 $ testpmd -c f -n 1 -m 1024 --no-pci --single-file --qtest-virtio \
             --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
             -- --disable-hw-vlan --txqflags=0xf00 -i

(*1) Please Specify same memory size in QEMU and DPDK command line.
(*2) Should use qemu-2.5.1, or above.
(*3) QEMU process is needed per port.
(*4) virtio-1.0 device are only supported.
(*5) The vhost backends like vhost-net and vhost-user can be specified.
(*6) In most cases, just using above command is enough, but you can also
     specify other QEMU virtio-net options.
(*7) Only checked "pc-i440fx-1.4" machine, but may work with other
     machines. It depends on a machine has piix3 south bridge.
     If the machine doesn't have, virtio-net PMD cannot receive status
     changed interrupts.
(*8) Should not add "--enable-kvm" to QEMU command line.


[Detailed Description]

 - virtio-net device implementation
The PMD uses QEMU virtio-net device. To do that, QEMU QTest functionality is used.
QTest is a test framework of QEMU devices. It allows us to implement a device driver outside of QEMU.
With QTest, we can implement DPDK application and virtio-net PMD as standalone process on host.
When QEMU is invoked as QTest mode, any guest code will not run.
To know more about QTest, see below.
http://wiki.qemu.org/Features/QTest

 - probing devices
QTest provides a unix domain socket. Through this socket, driver process can access to I/O port and memory of QEMU virtual machine.
The PMD will send I/O port accesses to probe pci devices.
If we can find virtio-net and ivshmem device, initialize the devices.
Also, I/O port accesses of virtio-net PMD will be sent through socket, and virtio-net PMD can initialize vitio-net device on QEMU correctly.

 - ivshmem device to share memory
To share memory that virtio-net PMD process uses, ivshmem device will be used.
Because ivshmem device can only handle one file descriptor, shared memory should be consist of one file.
To allocate such a memory, EAL has new option called "--single-file".
Also, the hugepages should be mapped between "1 << 31" to "1 << 44".
To map like above, EAL has one more new option called "-qtest-virtio".
While initializing ivshmem device, we can set BAR(Base Address Register).
It represents which memory QEMU vcpu can access to this shared memory.
We will specify host virtual address of shared memory as this address.
It is very useful because we don't need to apply patch to QEMU to calculate address offset.
(For example, if virtio-net PMD process will allocate memory from shared memory, then specify the virtual address of it to virtio-net register, QEMU virtio-net device can understand it without calculating address offset.)

Tetsuya Mukawa (5):
  virtio: Retrieve driver name from eth_dev
  EAL: Add new EAL "--qtest-virtio" option
  vhost: Add a function to check virtio device type
  virtio: Add support for qtest virtio-net PMD
  docs: add release note for qtest virtio container support

 config/common_linuxapp                     |    1 +
 doc/guides/rel_notes/release_2_3.rst       |    3 +
 drivers/net/virtio/Makefile                |    4 +
 drivers/net/virtio/qtest.c                 | 1342 ++++++++++++++++++++++++++++
 drivers/net/virtio/qtest.h                 |   65 ++
 drivers/net/virtio/virtio_ethdev.c         |  433 ++++++++-
 drivers/net/virtio/virtio_ethdev.h         |   32 +
 drivers/net/virtio/virtio_pci.c            |  364 +++++++-
 drivers/net/virtio/virtio_pci.h            |    5 +-
 lib/librte_eal/common/eal_common_options.c |   10 +
 lib/librte_eal/common/eal_internal_cfg.h   |    1 +
 lib/librte_eal/common/eal_options.h        |    2 +
 lib/librte_eal/linuxapp/eal/eal_memory.c   |   81 +-
 13 files changed, 2274 insertions(+), 69 deletions(-)
 create mode 100644 drivers/net/virtio/qtest.c
 create mode 100644 drivers/net/virtio/qtest.h

-- 
2.1.4

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH v2 1/5] virtio: Retrieve driver name from eth_dev
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (7 preceding siblings ...)
  2016-02-10  3:40     ` [PATCH v2 0/5] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
@ 2016-02-10  3:40     ` Tetsuya Mukawa
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
                       ` (3 subsequent siblings)
  12 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-10  3:40 UTC (permalink / raw)
  To: dev

Currently, virtio_dev_info_get() retrieves driver name from pci_drv.
If the driver is virtual PMD, pci_drv will be invalid.
So retrieves the name from eth_dev.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index b790fd0..1c8c955 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1438,7 +1438,7 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
 
-	dev_info->driver_name = dev->driver->pci_drv.name;
+	dev_info->driver_name = dev->data->drv_name;
 	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
 	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (8 preceding siblings ...)
  2016-02-10  3:40     ` [PATCH v2 1/5] virtio: Retrieve driver name from eth_dev Tetsuya Mukawa
@ 2016-02-10  3:40     ` Tetsuya Mukawa
  2016-02-15  7:52       ` Tan, Jianfeng
                         ` (8 more replies)
  2016-02-10  3:40     ` [PATCH v2 3/5] vhost: Add a function to check virtio device type Tetsuya Mukawa
                       ` (2 subsequent siblings)
  12 siblings, 9 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-10  3:40 UTC (permalink / raw)
  To: dev

To work with qtest virtio-net PMD, virtual address that maps hugepages
should be between (1 << 31) to (1 << 44). This patch adds one more option
to map like this. Also all hugepages should consists of one file.
Because of this, the option will work only when '--single-file' option is
specified.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_eal/common/eal_common_options.c | 10 ++++
 lib/librte_eal/common/eal_internal_cfg.h   |  1 +
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 81 +++++++++++++++++++++++++++++-
 4 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 65bccbd..34c8bd1 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -96,6 +96,7 @@ eal_long_options[] = {
 	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
 	{OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
 	{OPT_SINGLE_FILE,       0, NULL, OPT_SINGLE_FILE_NUM      },
+	{OPT_QTEST_VIRTIO,      0, NULL, OPT_QTEST_VIRTIO_NUM     },
 	{0,                     0, NULL, 0                        }
 };
 
@@ -902,6 +903,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->single_file = 1;
 		break;
 
+	case OPT_QTEST_VIRTIO_NUM:
+		conf->qtest_virtio = 1;
+		break;
+
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
@@ -971,6 +976,11 @@ eal_check_common_options(struct internal_config *internal_cfg)
 			"be specified together with --"OPT_SINGLE_FILE"\n");
 		return -1;
 	}
+	if (internal_cfg->qtest_virtio && !internal_cfg->single_file) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_QTEST_VIRTIO" cannot "
+			"be specified without --"OPT_SINGLE_FILE"\n");
+		return -1;
+	}
 
 	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
 		RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 9117ed9..7f3df39 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -71,6 +71,7 @@ struct internal_config {
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
 										* instead of native TSC */
+	volatile unsigned qtest_virtio;    /**< mmap hugepages to fit qtest virtio PMD */
 	volatile unsigned no_shconf;      /**< true if there is no shared config */
 	volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
 	volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index e5da14a..b33a3c3 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -85,6 +85,8 @@ enum {
 	OPT_XEN_DOM0_NUM,
 #define OPT_SINGLE_FILE       "single-file"
 	OPT_SINGLE_FILE_NUM,
+#define OPT_QTEST_VIRTIO      "qtest-virtio"
+	OPT_QTEST_VIRTIO_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a6b3616..677d6a7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -1092,6 +1092,73 @@ calc_num_pages_per_socket(uint64_t * memory,
 }
 
 /*
+ * Find memory space that fits qtest virtio-net PMD.
+ */
+static void *
+rte_eal_get_free_region(uint64_t alloc_size, uint64_t pagesz)
+{
+	uint64_t start, end, next_start;
+	uint64_t high_limit, low_limit;
+	char buf[1024], *p;
+	FILE *fp;
+	void *addr = NULL;
+
+	/* all hugepages should be mapped between below values */
+	low_limit = 1UL << 31;
+	high_limit = 1UL << 44;
+
+	/* allocation size should be aligned by page size */
+	if (alloc_size != RTE_ALIGN_CEIL(alloc_size, pagesz)) {
+		rte_panic("Invalid allocation size 0x%lx\n", alloc_size);
+		return NULL;
+	}
+
+	/*
+	 * address should be aligned by allocation size because
+	 * BAR register requiers such an address
+	 */
+	low_limit = RTE_ALIGN_CEIL(low_limit, alloc_size);
+	high_limit = RTE_ALIGN_FLOOR(high_limit, alloc_size);
+
+	fp = fopen("/proc/self/maps", "r");
+	if (fp == NULL) {
+		rte_panic("Cannot open /proc/self/maps\n");
+		return NULL;
+	}
+
+	next_start = 0;
+	do {
+		start = next_start;
+
+		if ((p = fgets(buf, sizeof(buf), fp)) != NULL) {
+			if (sscanf(p, "%lx-%lx ", &end, &next_start) < 2)
+				break;
+
+			next_start = RTE_ALIGN_CEIL(next_start, alloc_size);
+			end = RTE_ALIGN_CEIL(end, alloc_size) - 1;
+		} else
+			end = UINT64_MAX;
+
+		if (start >= high_limit)
+			break;
+		if (end < low_limit)
+			continue;
+
+		start = RTE_MAX(start, low_limit);
+		end = RTE_MIN(end, high_limit - 1);
+
+		if (end - start >= alloc_size - 1) {
+			addr = (void *)start;
+			break;
+		}
+	} while (end != UINT64_MAX);
+
+	fclose(fp);
+
+	return addr;
+}
+
+/*
  * Prepare physical memory mapping: fill configuration structure with
  * these infos, return 0 on success.
  *  1. map N huge pages in separate files in hugetlbfs
@@ -1132,6 +1199,7 @@ rte_eal_hugepage_init(void)
 		uint64_t pagesize;
 		unsigned socket_id = rte_socket_id();
 		char filepath[MAX_HUGEPAGE_PATH];
+		void *fixed;
 
 		if (internal_config.no_hugetlbfs) {
 			eal_get_hugefile_path(filepath, sizeof(filepath),
@@ -1158,7 +1226,18 @@ rte_eal_hugepage_init(void)
 			return -1;
 		}
 
-		addr = mmap(NULL, internal_config.memory,
+		if (internal_config.qtest_virtio) {
+			fixed = rte_eal_get_free_region(
+					internal_config.memory, pagesize);
+			if (fixed == NULL) {
+				RTE_LOG(ERR, EAL, "no free space to mmap %s\n",
+						filepath);
+				return -1;
+			}
+		} else
+			fixed = NULL;
+
+		addr = mmap(fixed, internal_config.memory,
 			    PROT_READ | PROT_WRITE,
 			    MAP_SHARED | MAP_POPULATE, fd, 0);
 		if (addr == MAP_FAILED) {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v2 3/5] vhost: Add a function to check virtio device type
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (9 preceding siblings ...)
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
@ 2016-02-10  3:40     ` Tetsuya Mukawa
  2016-02-10  3:40     ` [PATCH v2 4/5] virtio: Add support for qtest virtio-net PMD Tetsuya Mukawa
  2016-02-10  3:40     ` [PATCH v2 5/5] docs: add release note for qtest virtio container support Tetsuya Mukawa
  12 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-10  3:40 UTC (permalink / raw)
  To: dev

The patch adds below function to cleanup virtio code.
 - virtio_dev_check()

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c | 52 ++++++++++++++++++--------------------
 drivers/net/virtio/virtio_ethdev.h | 32 +++++++++++++++++++++++
 2 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 1c8c955..c3e877a 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -371,7 +371,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 	vq->mz = mz;
 	vq->vq_ring_virt_mem = mz->addr;
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		vq->vq_ring_mem = mz->phys_addr;
 
 		/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
@@ -429,7 +429,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 		vq->virtio_net_hdr_vaddr = mz->addr;
 		memset(vq->virtio_net_hdr_vaddr, 0, hdr_size);
 
-		if (dev->dev_type == RTE_ETH_DEV_PCI)
+		if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 			vq->virtio_net_hdr_mem = mz->phys_addr;
 #ifdef RTE_VIRTIO_VDEV
 		else
@@ -439,7 +439,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 	hw->vtpci_ops->setup_queue(hw, vq);
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI)
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
 #ifdef RTE_VIRTIO_VDEV
 	else
@@ -490,15 +490,13 @@ static void
 virtio_dev_close(struct rte_eth_dev *dev)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
 
 	/* reset the NIC */
-	if (dev->dev_type == RTE_ETH_DEV_PCI) {
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-			vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
-	}
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, RTE_PCI_DRV_INTR_LSC))
+		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
+
 	vtpci_reset(hw);
 	hw->started = 0;
 	virtio_dev_free_mbufs(dev);
@@ -1001,7 +999,7 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 	isr = vtpci_isr(hw);
 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI)
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
 			PMD_DRV_LOG(ERR, "interrupt enable failed");
 
@@ -1056,9 +1054,10 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	pci_dev = eth_dev->pci_dev;
 
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		if (vtpci_init(pci_dev, hw) < 0)
 			return -1;
+	}
 
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
@@ -1072,7 +1071,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 		return -1;
 
 	/* If host does not support status then disable LSC */
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI) {
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
 			pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
 
@@ -1154,13 +1153,14 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
 			hw->max_rx_queues, hw->max_tx_queues);
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI) {
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
 			     eth_dev->data->port_id, pci_dev->id.vendor_id,
 			     pci_dev->id.device_id);
 
 		/* Setup interrupt callback  */
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI,
+					NULL, RTE_PCI_DRV_INTR_LSC))
 			rte_intr_callback_register(&pci_dev->intr_handle,
 						   virtio_interrupt_handler,
 						   eth_dev);
@@ -1197,11 +1197,11 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->data->mac_addrs = NULL;
 
 	/* reset interrupt callback  */
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-			rte_intr_callback_unregister(&pci_dev->intr_handle,
-						     virtio_interrupt_handler,
-						     eth_dev);
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, RTE_PCI_DRV_INTR_LSC))
+		rte_intr_callback_unregister(&pci_dev->intr_handle,
+					     virtio_interrupt_handler,
+					     eth_dev);
+
 	rte_eal_pci_unmap_device(pci_dev);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
@@ -1248,7 +1248,6 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 {
 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	PMD_INIT_LOG(DEBUG, "configure");
 
@@ -1266,12 +1265,11 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 		return -ENOTSUP;
 	}
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI) {
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-			if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
-				PMD_DRV_LOG(ERR, "failed to set config vector");
-				return -EBUSY;
-			}
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, RTE_PCI_DRV_INTR_LSC)) {
+		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
+			PMD_DRV_LOG(ERR, "failed to set config vector");
+			return -EBUSY;
+		}
 	}
 
 	return 0;
@@ -1283,11 +1281,11 @@ virtio_dev_start(struct rte_eth_dev *dev)
 {
 	uint16_t nb_queues, i;
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
-		if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+		if (!virtio_dev_check(dev, RTE_ETH_DEV_PCI,
+					NULL, RTE_PCI_DRV_INTR_LSC)) {
 			PMD_DRV_LOG(ERR, "link status not supported by host");
 			return -ENOTSUP;
 		}
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index fde77ca..223b56d 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -67,6 +67,38 @@
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1ULL << VIRTIO_F_VERSION_1)
 
+static inline int
+virtio_dev_check(struct rte_eth_dev *dev, enum rte_eth_dev_type type,
+		const char *name, uint64_t flags)
+{
+	int ret;
+
+	if (dev == NULL)
+		return 0;
+
+	if (type != 0)
+		ret = (dev->dev_type == type);
+	else
+		ret = 1;
+
+	if (name != 0)
+		ret &= (strncmp(dev->data->name, name, strlen(name)) == 0);
+	else
+		ret &= 1;
+
+	if (flags != 0) {
+		if (dev->dev_type == RTE_ETH_DEV_PCI)
+			ret &= (dev->pci_dev->driver->drv_flags & flags) ? 1 : 0;
+		else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+			ret &= (dev->data->dev_flags & flags) ? 1 : 0;
+		else
+			ret = 0;
+	} else
+		ret &= 1;
+
+	return ret;
+}
+
 /*
  * CQ function prototype
  */
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v2 4/5] virtio: Add support for qtest virtio-net PMD
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (10 preceding siblings ...)
  2016-02-10  3:40     ` [PATCH v2 3/5] vhost: Add a function to check virtio device type Tetsuya Mukawa
@ 2016-02-10  3:40     ` Tetsuya Mukawa
  2016-02-10  3:40     ` [PATCH v2 5/5] docs: add release note for qtest virtio container support Tetsuya Mukawa
  12 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-10  3:40 UTC (permalink / raw)
  To: dev

The patch adds a new virtio-net PMD configuration that allows the PMD to
work on host as if the PMD is in VM.
Here is new configuration for virtio-net PMD.
 - CONFIG_RTE_VIRTIO_VDEV_QTEST
To use this mode, EAL needs map all hugepages as one file. Also the file
should be mapped between (1 << 31) and (1 << 44).

To allocate like above, add "--single-file" and "--qtest-virtio" option
to application command line.

To prepare virtio-net device on host, the users need to invoke QEMU
process in special qtest mode. This mode is mainly used for testing QEMU
devices from outer process. In this mode, no guest runs.
Here is QEMU command line.

 $ qemu-system-x86_64 \
     -machine pc-i440fx-1.4,accel=qtest \
     -display none -qtest-log /dev/null \
     -qtest unix:/tmp/socket,server \
     -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
     -device virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
     -chardev socket,id=chr1,path=/tmp/ivshmem,server \
     -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4

 * Should use qemu-2.5.1, or above.
 * QEMU process is needed per port.
 * virtio-1.0 device are only supported.
 * The vhost backends like vhost-net and vhost-user can be specified.
 * In most cases, just using above command is enough, but you can also
   specify other QEMU virtio-net options.
 * Only checked "pc-i440fx-1.4" machine, but may work with other
   machines. It depends on a machine has piix3 south bridge.
   If the machine doesn't have, virtio-net PMD cannot receive status
   changed interrupts.
 * Should not add "--enable-kvm" to QEMU command line.

After invoking QEMU, the PMD can connect to QEMU process using unix
domain sockets. Over these sockets, virtio-net, ivshmem and piix3
device in QEMU are probed by the PMD.
Here is example of command line.

 $ testpmd -c f -n 1 -m 1024 --no-pci --single-file --qtest-virtio \
      --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
      -- --disable-hw-vlan --txqflags=0xf00 -i

Please specify same unix domain sockets and memory size in both QEMU
and DPDK command lines like above.
The share memory size should be power of 2, because ivshmem only
accepts such memory size.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 config/common_linuxapp             |    1 +
 drivers/net/virtio/Makefile        |    4 +
 drivers/net/virtio/qtest.c         | 1342 ++++++++++++++++++++++++++++++++++++
 drivers/net/virtio/qtest.h         |   65 ++
 drivers/net/virtio/virtio_ethdev.c |  383 +++++++++-
 drivers/net/virtio/virtio_pci.c    |  364 +++++++++-
 drivers/net/virtio/virtio_pci.h    |    5 +-
 7 files changed, 2122 insertions(+), 42 deletions(-)
 create mode 100644 drivers/net/virtio/qtest.c
 create mode 100644 drivers/net/virtio/qtest.h

diff --git a/config/common_linuxapp b/config/common_linuxapp
index f76e162..7cbf50d 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -539,3 +539,4 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 # Enable virtio support for container
 #
 CONFIG_RTE_VIRTIO_VDEV=y
+CONFIG_RTE_VIRTIO_VDEV_QTEST=y
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index ef920f9..6c11378 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -56,6 +56,10 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
 	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost_embedded.c
 endif
 
+ifeq ($(CONFIG_RTE_VIRTIO_VDEV_QTEST),y)
+	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
+endif
+
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
new file mode 100644
index 0000000..418214f
--- /dev/null
+++ b/drivers/net/virtio/qtest.c
@@ -0,0 +1,1342 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/queue.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/eventfd.h>
+#include <linux/pci_regs.h>
+
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+#include <rte_interrupts.h>
+
+#include "virtio_pci.h"
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "qtest.h"
+
+#define NB_BAR                          6
+
+/* PIIX3 configuration registers */
+#define PIIX3_REG_ADDR_PIRQA            0x60
+#define PIIX3_REG_ADDR_PIRQB            0x61
+#define PIIX3_REG_ADDR_PIRQC            0x62
+#define PIIX3_REG_ADDR_PIRQD            0x63
+
+/* Device information */
+#define VIRTIO_NET_DEVICE_ID            0x1000
+#define VIRTIO_NET_VENDOR_ID            0x1af4
+#define VIRTIO_NET_IRQ_NUM              10
+#define IVSHMEM_DEVICE_ID               0x1110
+#define IVSHMEM_VENDOR_ID               0x1af4
+#define IVSHMEM_PROTOCOL_VERSION        0
+#define PIIX3_DEVICE_ID                 0x7000
+#define PIIX3_VENDOR_ID                 0x8086
+
+/* ------------------------------------------------------------
+ * IO port mapping of qtest guest
+ * ------------------------------------------------------------
+ * 0x0000 - 0xbfff : not used
+ * 0xc000 - 0xc03f : virtio-net(BAR0)
+ * 0xc040 - 0xffff : not used
+ *
+ * ------------------------------------------------------------
+ * Memory mapping of qtest quest
+ * ------------------------------------------------------------
+ * 0x00000000_00000000 - 0x00000000_3fffffff : not used
+ * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
+ * 0x00000000_40001000 - 0x00000000_40ffffff : not used
+ * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
+ * 0x00000000_41800000 - 0x00000000_41ffffff : not used
+ * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
+ * 0x00000000_42000100 - 0x00000000_42ffffff : not used
+ * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)
+ *
+ * We can only specify start address of a region. The region size
+ * will be defined by the device implementation in QEMU.
+ * The size will be pow of 2 accroding to the PCI specification.
+ * Also, the region start address should be aligned by region size.
+ *
+ * BAR2 of ivshmem will be mmapped address of DPDK application memory.
+ * So this address will be dynamically changed, but not to overlap
+ * others, it shoulb be mmaped between above addresses. Such allocation
+ * is done by EAL. Check rte_eal_get_free_region() also.
+ */
+#define VIRTIO_NET_IO_START             0xc000
+#define VIRTIO_NET_MEMORY1_START	0x40000000
+#define VIRTIO_NET_MEMORY2_START	0x41000000
+#define IVSHMEM_MEMORY_START            0x42000000
+
+#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
+	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
+	((_function) & 0x7) << 8 | ((_offset) & 0xfc))
+
+static char interrupt_message[32];
+
+enum qtest_pci_bar_type {
+	QTEST_PCI_BAR_DISABLE = 0,
+	QTEST_PCI_BAR_IO,
+	QTEST_PCI_BAR_MEMORY_UNDER_1MB,
+	QTEST_PCI_BAR_MEMORY_32,
+	QTEST_PCI_BAR_MEMORY_64
+};
+
+struct qtest_pci_bar {
+	enum qtest_pci_bar_type type;
+	uint8_t addr;
+	uint64_t region_start;
+	uint64_t region_size;
+};
+
+struct qtest_session;
+TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
+struct qtest_pci_device {
+	TAILQ_ENTRY(qtest_pci_device) next;
+	const char *name;
+	uint16_t device_id;
+	uint16_t vendor_id;
+	uint8_t bus_addr;
+	uint8_t device_addr;
+	struct qtest_pci_bar bar[NB_BAR];
+	int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
+};
+
+union qtest_pipefds {
+	struct {
+		int pipefd[2];
+	};
+	struct {
+		int readfd;
+		int writefd;
+	};
+};
+
+struct qtest_session {
+	int qtest_socket;
+	pthread_mutex_t qtest_session_lock;
+
+	struct qtest_pci_device_list head;
+	int ivshmem_socket;
+
+	pthread_t event_th;
+	char *evq;
+	char *evq_dequeue_ptr;
+	size_t evq_total_len;
+
+	union qtest_pipefds msgfds;
+
+	pthread_t intr_th;
+	int eventfd;
+	rte_atomic16_t enable_intr;
+	rte_intr_callback_fn cb;
+	void *cb_arg;
+	struct rte_eth_dev_data *eth_data;
+};
+
+static int
+qtest_raw_send(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = write(fd, buf, len);
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			return ret;
+		}
+		if (ret == (int)len)
+			break;
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+static int
+qtest_raw_recv(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = read(fd, buf, len);
+		if (ret <= 0) {
+			if (errno == EINTR) {
+				continue;
+			}
+			return ret;
+		}
+		if (ret == (int)len)
+			break;
+		if (*(buf + ret - 1) == '\n')
+			break;
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+/*
+ * To know QTest protocol specification, see below QEMU source code.
+ *  - qemu/qtest.c
+ * If qtest socket is closed, qtest_raw_in and qtest_raw_read will return 0.
+ */
+static uint32_t
+qtest_raw_in(struct qtest_session *s, uint16_t addr, char type)
+{
+	char buf[64];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "in%c 0x%x\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_out(struct qtest_session *s, uint16_t addr, uint32_t val, char type)
+{
+	char buf[64];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "out%c 0x%x 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+static uint32_t
+qtest_raw_read(struct qtest_session *s, uint64_t addr, char type)
+{
+	char buf[64];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "read%c 0x%lx\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_write(struct qtest_session *s, uint64_t addr, uint32_t val, char type)
+{
+	char buf[64];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "write%c 0x%lx 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+/*
+ * qtest_pci_inX/outX are used for accessing PCI configuration space.
+ * The functions are implemented based on PCI configuration space
+ * specification.
+ * Accroding to the spec, access size of read()/write() should be 4 bytes.
+ */
+static int
+qtest_pci_inb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return (tmp >> ((offset & 0x3) * 8)) & 0xff;
+}
+
+static void
+qtest_pci_outb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint8_t value)
+{
+	uint32_t addr, tmp, pos;
+
+	addr = PCI_CONFIG_ADDR(bus, device, function, offset);
+	pos = (offset % 4) * 8;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+	tmp = (tmp & ~(0xff << pos)) | (value << pos);
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	qtest_raw_out(s, 0xcfc, tmp, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint32_t
+qtest_pci_inl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return tmp;
+}
+
+static void
+qtest_pci_outl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint32_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, value, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint64_t
+qtest_pci_inq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+	uint64_t val;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val = (uint64_t)qtest_raw_in(s, 0xcfc, 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val |= (uint64_t)qtest_raw_in(s, 0xcfc, 'l') << 32;
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return val;
+}
+
+static void
+qtest_pci_outq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint64_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value & 0xffffffff), 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value >> 32), 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+/*
+ * qtest_in/out are used for accessing ioport of qemu guest.
+ * qtest_read/write are used for accessing memory of qemu guest.
+ */
+uint32_t
+qtest_in(struct virtio_hw *hw, uint16_t addr, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_in(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_out(struct virtio_hw *hw, uint16_t addr, uint64_t val, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+uint32_t
+qtest_read(struct virtio_hw *hw, uint64_t addr, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_read(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_write(struct virtio_hw *hw, uint64_t addr, uint64_t val, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_write(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+static struct qtest_pci_device *
+qtest_find_device(struct qtest_session *s, const char *name)
+{
+	struct qtest_pci_device *dev;
+
+	TAILQ_FOREACH(dev, &s->head, next) {
+		if (strcmp(dev->name, name) == 0)
+			return dev;
+	}
+	return NULL;
+}
+
+/*
+ * The function is used for reading pci configuration space of specifed device.
+ */
+int
+qtest_read_pci_cfg(struct virtio_hw *hw, const char *name,
+		void *buf, size_t len, off_t offset)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	struct qtest_pci_device *dev;
+	uint32_t i;
+	uint8_t *p = buf;
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s\n", name);
+		return -1;
+	}
+
+	for (i = 0; i < len; i++) {
+		*(p + i) = qtest_pci_inb(s,
+				dev->bus_addr, dev->device_addr, 0, offset + i);
+	}
+
+	return 0;
+}
+
+static struct qtest_pci_bar *
+qtest_get_bar(struct virtio_hw *hw, const char *name, uint8_t bar)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	struct qtest_pci_device *dev;
+
+	if (bar >= NB_BAR) {
+		PMD_DRV_LOG(ERR, "Invalid bar is specified: %u\n", bar);
+		return NULL;
+	}
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s\n", name);
+		return NULL;
+	}
+
+	if (dev->bar[bar].type == QTEST_PCI_BAR_DISABLE) {
+		PMD_DRV_LOG(ERR, "Cannot find valid BAR(%s): %u\n", name, bar);
+		return NULL;
+	}
+
+	return &dev->bar[bar];
+}
+
+int
+qtest_get_bar_addr(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t **addr)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(hw, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*addr = (uint64_t *)bar_ptr->region_start;
+	return 0;
+}
+
+int
+qtest_get_bar_size(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t *size)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(hw, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*size = bar_ptr->region_size;
+	return 0;
+}
+
+int
+qtest_intr_enable(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 1);
+
+	return 0;
+}
+
+int
+qtest_intr_disable(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 0);
+
+	return 0;
+}
+
+void
+qtest_intr_callback_register(void *data,
+		rte_intr_callback_fn cb, void *cb_arg)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	s->cb = cb;
+	s->cb_arg = cb_arg;
+	rte_atomic16_set(&s->enable_intr, 1);
+}
+
+void
+qtest_intr_callback_unregister(void *data,
+		rte_intr_callback_fn cb __rte_unused,
+		void *cb_arg __rte_unused)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->cb = NULL;
+	s->cb_arg = NULL;
+}
+
+static void *
+qtest_intr_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	eventfd_t value;
+	int ret;
+
+	for (;;) {
+		ret = eventfd_read(s->eventfd, &value);
+		if (ret < 0)
+			return NULL;
+		s->cb(NULL, s->cb_arg);
+	}
+	return NULL;
+}
+
+static int
+qtest_intr_initialize(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+	char buf[64];
+	int ret;
+
+	s = (struct qtest_session *)hw->qsession;
+
+	/* This message will come when interrupt occurs */
+	snprintf(interrupt_message, sizeof(interrupt_message),
+			"IRQ raise %d", VIRTIO_NET_IRQ_NUM);
+
+	snprintf(buf, sizeof(buf), "irq_intercept_in ioapic\n");
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	/* To enable interrupt, send "irq_intercept_in" message to QEMU */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	/* just ignore QEMU response */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return 0;
+}
+
+static void
+qtest_event_send(struct qtest_session *s, char *buf)
+{
+	int ret;
+
+	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
+		if (rte_atomic16_read(&s->enable_intr) == 0)
+			return;
+
+		/* relay interrupt to eventfd */
+		ret = eventfd_write(s->eventfd, 1);
+		if (ret < 0)
+			rte_panic("cannot relay interrupt\n");
+	} else {
+		/* relay normal message to pipe */
+		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
+		if (ret < 0)
+			rte_panic("cannot relay normal message\n");
+	}
+}
+
+static void
+qtest_close_one_socket(int *fd)
+{
+	if (*fd > 0) {
+		close(*fd);
+		*fd = -1;
+	}
+}
+
+static void
+qtest_close_sockets(struct qtest_session *s)
+{
+	qtest_close_one_socket(&s->qtest_socket);
+	qtest_close_one_socket(&s->msgfds.readfd);
+	qtest_close_one_socket(&s->msgfds.writefd);
+	qtest_close_one_socket(&s->eventfd);
+	qtest_close_one_socket(&s->ivshmem_socket);
+}
+
+static void
+qtest_event_enqueue(struct qtest_session *s, char *buf)
+{
+	size_t len = strlen(buf);
+	char *dest;
+
+	if (s->evq == NULL) {
+		/* allocate one more byte for '\0' */
+		s->evq = malloc(len + 1);
+		if (s->evq == NULL)
+			rte_panic("Cannot allocate memory\n");
+
+		s->evq_dequeue_ptr = s->evq;
+		s->evq_total_len = len + 1;
+		dest = s->evq;
+	} else {
+		size_t offset = s->evq_dequeue_ptr - s->evq;
+
+		s->evq = realloc(s->evq, s->evq_total_len + len);
+		if (s->evq == NULL)
+			rte_panic("Cannot re-allocate memory\n");
+
+		s->evq_dequeue_ptr = s->evq + offset;
+		dest = s->evq + s->evq_total_len - 1;
+		s->evq_total_len += len;
+	}
+
+	strncpy(dest, buf, len);
+	dest[len] = '\0';
+}
+
+static char *
+qtest_event_dequeue(struct qtest_session *s)
+{
+	char *head, *next_head;
+
+	head = s->evq_dequeue_ptr;
+
+	/* make sure message is terminated by '\n' */
+	next_head = strchr(s->evq_dequeue_ptr, '\n');
+	if (next_head == NULL)
+		return NULL;
+
+	/* set next dequeue pointer */
+	s->evq_dequeue_ptr = next_head + 1;
+
+	return head;
+}
+
+static void
+qtest_event_flush(struct qtest_session *s)
+{
+	if (s->evq) {
+		free(s->evq);
+		s->evq = NULL;
+		s->evq_dequeue_ptr = NULL;
+		s->evq_total_len = 0;
+	}
+}
+
+/*
+ * This thread relays QTest response using pipe and eventfd.
+ * The function is needed because we need to separate IRQ message from others.
+ */
+static void *
+qtest_event_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	char buf[64];
+	char *p;
+	int ret;
+
+	for (;;) {
+		memset(buf, 0, sizeof(buf));
+		ret = qtest_raw_recv(s->qtest_socket, buf, sizeof(buf));
+		if (ret <= 0) {
+			PMD_DRV_LOG(EMERG,
+				"Port %u: qtest connection was closed.\n"
+				"Please detach the port, then start QEMU "
+				"and attach the port again.\n",
+				s->eth_data->port_id);
+			qtest_close_sockets(s);
+			qtest_event_flush(s);
+			return NULL;
+		}
+
+		qtest_event_enqueue(s, buf);
+
+		/* in the case of incomplete message, receive again */
+		p = &buf[sizeof(buf) - 1];
+		if ((*p != '\0') && (*p != '\n'))
+			continue;
+
+		/* may receive multiple messages at the same time */
+		while ((p = qtest_event_dequeue(s)) != NULL)
+			qtest_event_send(s, p);
+
+		qtest_event_flush(s);
+	}
+	return NULL;
+}
+
+static int
+qtest_init_piix3_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t bus, device, virtio_net_slot = 0;
+	struct qtest_pci_device *tmpdev;
+	uint8_t pcislot2regaddr[] = {	0xff,
+					0xff,
+					0xff,
+					PIIX3_REG_ADDR_PIRQC,
+					PIIX3_REG_ADDR_PIRQD,
+					PIIX3_REG_ADDR_PIRQA,
+					PIIX3_REG_ADDR_PIRQB};
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Get slot id that is connected to virtio-net */
+	TAILQ_FOREACH(tmpdev, &s->head, next) {
+		if (strcmp(tmpdev->name, "virtio-net") == 0) {
+			virtio_net_slot = tmpdev->device_addr;
+			break;
+		}
+	}
+
+	if (virtio_net_slot == 0)
+		return -1;
+
+	/*
+	 * Set interrupt routing for virtio-net device.
+	 * Here is i440fx/piix3 connection settings
+	 * ---------------------------------------
+	 * PCI Slot3 -> PIRQC
+	 * PCI Slot4 -> PIRQD
+	 * PCI Slot5 -> PIRQA
+	 * PCI Slot6 -> PIRQB
+	 */
+	if (pcislot2regaddr[virtio_net_slot] != 0xff) {
+		qtest_pci_outb(s, bus, device, 0,
+				pcislot2regaddr[virtio_net_slot],
+				VIRTIO_NET_IRQ_NUM);
+	}
+
+	return 0;
+}
+
+/*
+ * Common initialization of PCI device.
+ * To know detail, see pci specification.
+ */
+static int
+qtest_init_pci_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t i, bus, device;
+	uint32_t val;
+	uint64_t val64;
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Check header type */
+	val = qtest_pci_inb(s, bus, device, 0, PCI_HEADER_TYPE);
+	if (val != PCI_HEADER_TYPE_NORMAL) {
+		PMD_DRV_LOG(ERR, "Unexpected header type %d\n", val);
+		return -1;
+	}
+
+	/* Check BAR type */
+	for (i = 0; i < NB_BAR; i++) {
+		val = qtest_pci_inl(s, bus, device, 0, dev->bar[i].addr);
+
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_IO)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_1M)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_32:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_32)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_64)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Enable device */
+	val = qtest_pci_inl(s, bus, device, 0, PCI_COMMAND);
+	val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	qtest_pci_outl(s, bus, device, 0, PCI_COMMAND, val);
+
+	/* Calculate BAR size */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffff);
+			val = qtest_pci_inl(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size = ~(val & 0xfffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffffffffffff);
+			val64 = qtest_pci_inq(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size =
+					~(val64 & 0xfffffffffffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Set BAR region */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+qtest_find_pci_device(struct qtest_session *s, const char *name,
+			struct rte_pci_addr *addr)
+{
+	struct qtest_pci_device *dev;
+	uint32_t val;
+
+	PMD_DRV_LOG(INFO, "PCI address of %s is %04x:%02x:%02x.%02x\n", name,
+			addr->domain, addr->bus, addr->devid, addr->function);
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL)
+		goto error;
+
+	val = qtest_pci_inl(s, addr->bus, addr->devid, addr->function, 0);
+	if (val == ((uint32_t)dev->device_id << 16 | dev->vendor_id)) {
+		dev->bus_addr = addr->bus;
+		dev->device_addr = addr->devid;
+		return 0;
+	}
+
+error:
+	PMD_DRV_LOG(ERR, "%s isn' found on %04x:%02x:%02x.%02x\n", name,
+			addr->domain, addr->bus, addr->devid, addr->function);
+	return -1;
+}
+
+static int
+qtest_init_pci_devices(struct qtest_session *s,
+			struct rte_pci_addr *virtio_addr,
+			struct rte_pci_addr *ivshmem_addr,
+			struct rte_pci_addr *piix3_addr)
+{
+	struct qtest_pci_device *dev;
+	int ret;
+
+
+	/* Try to find devices */
+	ret = qtest_find_pci_device(s, "virtio-net", virtio_addr);
+	if (ret < 0)
+		return -1;
+
+	ret = qtest_find_pci_device(s, "ivshmem", ivshmem_addr);
+	if (ret < 0)
+		return -1;
+
+	ret = qtest_find_pci_device(s, "piix3", piix3_addr);
+	if (ret < 0)
+		return -1;
+
+	/* Initialize devices */
+	TAILQ_FOREACH(dev, &s->head, next) {
+		ret = dev->init(s, dev);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+struct rte_pci_id
+qtest_get_pci_id_of_virtio_net(void)
+{
+	struct rte_pci_id id =  {VIRTIO_NET_DEVICE_ID,
+		VIRTIO_NET_VENDOR_ID, PCI_ANY_ID, PCI_ANY_ID};
+
+	return id;
+}
+
+static int
+qtest_register_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *virtio_net, *ivshmem, *piix3;
+	const struct rte_memseg *ms;
+
+	ms = rte_eal_get_physmem_layout();
+	/* if EAL memory size isn't pow of 2, ivshmem will refuse it */
+	if ((ms[0].len & (ms[0].len - 1)) != 0) {
+		PMD_DRV_LOG(ERR, "memory size must be power of 2\n");
+		return -1;
+	}
+
+	virtio_net = malloc(sizeof(*virtio_net));
+	if (virtio_net == NULL)
+		return -1;
+
+	ivshmem = malloc(sizeof(*ivshmem));
+	if (ivshmem == NULL)
+		return -1;
+
+	piix3 = malloc(sizeof(*piix3));
+	if (piix3 == NULL)
+		return -1;
+
+	memset(virtio_net, 0, sizeof(*virtio_net));
+	memset(ivshmem, 0, sizeof(*ivshmem));
+
+	TAILQ_INIT(&s->head);
+
+	virtio_net->name = "virtio-net";
+	virtio_net->device_id = VIRTIO_NET_DEVICE_ID;
+	virtio_net->vendor_id = VIRTIO_NET_VENDOR_ID;
+	virtio_net->init = qtest_init_pci_device;
+	virtio_net->bar[0].addr = PCI_BASE_ADDRESS_0;
+	virtio_net->bar[0].type = QTEST_PCI_BAR_IO;
+	virtio_net->bar[0].region_start = VIRTIO_NET_IO_START;
+	virtio_net->bar[1].addr = PCI_BASE_ADDRESS_1;
+	virtio_net->bar[1].type = QTEST_PCI_BAR_MEMORY_32;
+	virtio_net->bar[1].region_start = VIRTIO_NET_MEMORY1_START;
+	virtio_net->bar[4].addr = PCI_BASE_ADDRESS_4;
+	virtio_net->bar[4].type = QTEST_PCI_BAR_MEMORY_64;
+	virtio_net->bar[4].region_start = VIRTIO_NET_MEMORY2_START;
+	TAILQ_INSERT_TAIL(&s->head, virtio_net, next);
+
+	ivshmem->name = "ivshmem";
+	ivshmem->device_id = IVSHMEM_DEVICE_ID;
+	ivshmem->vendor_id = IVSHMEM_VENDOR_ID;
+	ivshmem->init = qtest_init_pci_device;
+	ivshmem->bar[0].addr = PCI_BASE_ADDRESS_0;
+	ivshmem->bar[0].type = QTEST_PCI_BAR_MEMORY_32;
+	ivshmem->bar[0].region_start = IVSHMEM_MEMORY_START;
+	ivshmem->bar[2].addr = PCI_BASE_ADDRESS_2;
+	ivshmem->bar[2].type = QTEST_PCI_BAR_MEMORY_64;
+	/* In host mode, only one memory segment is vaild */
+	ivshmem->bar[2].region_start = (uint64_t)ms[0].addr;
+	TAILQ_INSERT_TAIL(&s->head, ivshmem, next);
+
+	/* piix3 is needed to route irqs from virtio-net to ioapic */
+	piix3->name = "piix3";
+	piix3->device_id = PIIX3_DEVICE_ID;
+	piix3->vendor_id = PIIX3_VENDOR_ID;
+	piix3->init = qtest_init_piix3_device;
+	TAILQ_INSERT_TAIL(&s->head, piix3, next);
+
+	return 0;
+}
+
+static int
+qtest_send_message_to_ivshmem(int sock_fd, uint64_t client_id, int shm_fd)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = &client_id;
+	iov.iov_len = sizeof(client_id);
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (shm_fd >= 0) {
+		msgh.msg_control = &control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), &shm_fd, fdsize);
+	}
+
+	do {
+		ret = sendmsg(sock_fd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int
+qtest_setup_shared_memory(struct qtest_session *s)
+{
+	int shm_fd, num, ret;
+	struct back_file *huges;
+
+	num = rte_eal_get_backfile_info(&huges);
+	if (num != 1) {
+		PMD_DRV_LOG(ERR,
+			"Not supported memory configuration\n");
+		return -1;
+	}
+
+	shm_fd = open(huges[0].filepath, O_RDWR);
+	if (shm_fd < 0) {
+		PMD_DRV_LOG(ERR,
+			"Cannot open file: %s\n", huges[0].filepath);
+		return -1;
+	}
+
+	/* send our protocol version first */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket,
+			IVSHMEM_PROTOCOL_VERSION, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR,
+			"Failed to send protocol version to ivshmem\n");
+		return -1;
+	}
+
+	/* send client id */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, 0, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to send VMID to ivshmem\n");
+		return -1;
+	}
+
+	/* send message to ivshmem */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, -1, shm_fd);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to file descriptor to ivshmem\n");
+		return -1;
+	}
+
+	close(shm_fd);
+
+	return 0;
+}
+
+int
+qtest_vdev_init(struct rte_eth_dev_data *data,
+		int qtest_socket, int ivshmem_socket,
+		struct rte_pci_addr *virtio_addr,
+		struct rte_pci_addr *ivshmem_addr,
+		struct rte_pci_addr *piix3_addr)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+	int ret;
+
+	s = rte_zmalloc(NULL, sizeof(*s), RTE_CACHE_LINE_SIZE);
+
+	ret = pipe(s->msgfds.pipefd);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize message pipe\n");
+		return -1;
+	}
+
+	s->eventfd = eventfd(0, 0);
+	if (s->eventfd < 0) {
+		PMD_DRV_LOG(ERR, "Failed to open eventfd\n");
+		return -1;
+	}
+
+	ret = qtest_register_target_devices(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize qtest session\n");
+		return -1;
+	}
+
+	ret = pthread_mutex_init(&s->qtest_session_lock, NULL);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize mutex\n");
+		return -1;
+	}
+
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->qtest_socket = qtest_socket;
+	s->ivshmem_socket = ivshmem_socket;
+	s->eth_data = data;
+	hw->qsession = (void *)s;
+
+	ret = pthread_create(&s->event_th, NULL, qtest_event_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create event handler\n");
+		return -1;
+	}
+
+	ret = pthread_create(&s->intr_th, NULL, qtest_intr_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create interrupt handler\n");
+		return -1;
+	}
+
+	ret = qtest_intr_initialize(data);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize interrupt\n");
+		return -1;
+	}
+
+	ret = qtest_setup_shared_memory(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to setup shared memory\n");
+		return -1;
+	}
+
+	ret = qtest_init_pci_devices(s, virtio_addr, ivshmem_addr, piix3_addr);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize devices\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+qtest_remove_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *dev, *next;
+
+	for (dev = TAILQ_FIRST(&s->head); dev != NULL; dev = next) {
+		next = TAILQ_NEXT(dev, next);
+		TAILQ_REMOVE(&s->head, dev, next);
+		free(dev);
+	}
+}
+
+void
+qtest_vdev_uninit(struct rte_eth_dev_data *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+
+	qtest_close_sockets(s);
+	qtest_event_flush(s);
+
+	pthread_cancel(s->event_th);
+	pthread_join(s->event_th, NULL);
+
+	pthread_cancel(s->intr_th);
+	pthread_join(s->intr_th, NULL);
+
+	pthread_mutex_destroy(&s->qtest_session_lock);
+
+	qtest_remove_target_devices(s);
+
+	rte_free(s);
+	hw->qsession = NULL;
+}
diff --git a/drivers/net/virtio/qtest.h b/drivers/net/virtio/qtest.h
new file mode 100644
index 0000000..965e985
--- /dev/null
+++ b/drivers/net/virtio/qtest.h
@@ -0,0 +1,65 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_QTEST_H_
+#define _VIRTIO_QTEST_H_
+
+#include "virtio_pci.h"
+
+#define QTEST_DRV_NAME		"eth_qtest_virtio"
+
+int qtest_vdev_init(struct rte_eth_dev_data *data, int qtest_socket,
+		int ivshmem_socket, struct rte_pci_addr *virtio_addr,
+		struct rte_pci_addr *ivshmem_addr,
+		struct rte_pci_addr *piix3_addr);
+void qtest_vdev_uninit(struct rte_eth_dev_data *data);
+void qtest_intr_callback_register(void *data,
+		rte_intr_callback_fn cb, void *cb_arg);
+void qtest_intr_callback_unregister(void *data,
+		rte_intr_callback_fn cb, void *cb_arg);
+int qtest_intr_enable(void *data);
+int qtest_intr_disable(void *data);
+struct rte_pci_id qtest_get_pci_id_of_virtio_net(void);
+
+uint32_t qtest_in(struct virtio_hw *, uint16_t, char type);
+void qtest_out(struct virtio_hw *, uint16_t, uint64_t, char type);
+uint32_t qtest_read(struct virtio_hw *, uint64_t, char type);
+void qtest_write(struct virtio_hw *, uint64_t, uint64_t, char type);
+int qtest_read_pci_cfg(struct virtio_hw *hw, const char *name,
+		void *buf, size_t len, off_t offset);
+int qtest_get_bar_addr(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t **addr);
+int qtest_get_bar_size(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t *size);
+
+#endif /* _VIRTIO_QTEST_H_ */
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index c3e877a..4d3df85 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -36,6 +36,10 @@
 #include <stdio.h>
 #include <errno.h>
 #include <unistd.h>
+#ifdef RTE_VIRTIO_VDEV_QTEST
+#include <sys/socket.h>
+#include <sys/un.h>
+#endif
 
 #include <rte_ethdev.h>
 #include <rte_memcpy.h>
@@ -60,6 +64,9 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+#include "qtest.h"
+#endif
 
 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
@@ -387,7 +394,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 			return -ENOMEM;
 		}
 	}
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_VDEV_QTEST)
 	else
 		vq->vq_ring_mem = (phys_addr_t)mz->addr; /* Use vaddr!!! */
 #endif
@@ -431,7 +438,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 		if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 			vq->virtio_net_hdr_mem = mz->phys_addr;
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_VDEV_QTEST)
 		else
 			vq->virtio_net_hdr_mem = (phys_addr_t)mz->addr;
 #endif
@@ -441,7 +448,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_VDEV_QTEST)
 	else
 		vq->offset = offsetof(struct rte_mbuf, buf_addr);
 #endif
@@ -999,6 +1006,23 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 	isr = vtpci_isr(hw);
 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	if (virtio_dev_check(dev, RTE_ETH_DEV_VIRTUAL, QTEST_DRV_NAME, 0)) {
+		if (qtest_intr_enable(dev->data) < 0)
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+		/*
+		 * If last qtest message is interrupt, 'isr' will be 0
+		 * becasue socket has been closed already.
+		 * But still we want to notice this event to EAL.
+		 * So just ignore isr value.
+		 */
+		if (virtio_dev_link_update(dev, 0) == 0)
+			_rte_eth_dev_callback_process(dev,
+					RTE_ETH_EVENT_INTR_LSC);
+		return;
+	}
+#endif
+
 	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
 			PMD_DRV_LOG(ERR, "interrupt enable failed");
@@ -1055,9 +1079,16 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	pci_dev = eth_dev->pci_dev;
 
 	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
-		if (vtpci_init(pci_dev, hw) < 0)
+		if (vtpci_init(eth_dev, hw) < 0)
 			return -1;
 	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev,
+				RTE_ETH_DEV_VIRTUAL, QTEST_DRV_NAME, 0)) {
+		if (vtpci_init(eth_dev, hw) < 0)
+			return -1;
+	}
+#endif
 
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
@@ -1077,6 +1108,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 		rte_eth_copy_pci_info(eth_dev, pci_dev);
 	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev,
+				RTE_ETH_DEV_VIRTUAL, QTEST_DRV_NAME, 0)) {
+		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
+			pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+	}
+#endif
 
 	rx_func_get(eth_dev);
 
@@ -1165,6 +1203,26 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 						   virtio_interrupt_handler,
 						   eth_dev);
 	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0)) {
+		struct rte_pci_id id;
+
+		id = qtest_get_pci_id_of_virtio_net();
+		RTE_SET_USED(id);
+
+		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
+				eth_dev->data->port_id,
+				id.vendor_id, id.device_id);
+
+		/* Setup interrupt callback  */
+		if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+					NULL, RTE_ETH_DEV_INTR_LSC))
+			qtest_intr_callback_register(eth_dev->data,
+					virtio_interrupt_handler, eth_dev);
+	}
+#endif
+
 	virtio_dev_cq_start(eth_dev);
 
 	return 0;
@@ -1202,7 +1260,15 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 					     virtio_interrupt_handler,
 					     eth_dev);
 
-	rte_eal_pci_unmap_device(pci_dev);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, RTE_ETH_DEV_INTR_LSC))
+		qtest_intr_callback_unregister(eth_dev->data,
+				virtio_interrupt_handler, eth_dev);
+#endif
+
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0))
+		rte_eal_pci_unmap_device(pci_dev);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
@@ -1284,16 +1350,34 @@ virtio_dev_start(struct rte_eth_dev *dev)
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
-		if (!virtio_dev_check(dev, RTE_ETH_DEV_PCI,
-					NULL, RTE_PCI_DRV_INTR_LSC)) {
+		int pdev_has_lsc = 0, vdev_has_lsc = 0;
+
+		pdev_has_lsc = virtio_dev_check(dev, RTE_ETH_DEV_PCI,
+				NULL, RTE_PCI_DRV_INTR_LSC);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+		vdev_has_lsc = virtio_dev_check(dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, RTE_ETH_DEV_INTR_LSC);
+#endif
+
+		if ((!pdev_has_lsc) && (!vdev_has_lsc)) {
 			PMD_DRV_LOG(ERR, "link status not supported by host");
 			return -ENOTSUP;
 		}
 
-		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
-			PMD_DRV_LOG(ERR, "interrupt enable failed");
-			return -EIO;
+		if (pdev_has_lsc) {
+			if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
 		}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+		else if (vdev_has_lsc) {
+			if (qtest_intr_enable(dev->data) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
+		}
+#endif
 	}
 
 	/* Initialize Link state */
@@ -1390,8 +1474,15 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
-	if (dev->data->dev_conf.intr_conf.lsc)
-		rte_intr_disable(&dev->pci_dev->intr_handle);
+	if (dev->data->dev_conf.intr_conf.lsc) {
+		if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
+			rte_intr_disable(&dev->pci_dev->intr_handle);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+		else if (virtio_dev_check(dev, RTE_ETH_DEV_VIRTUAL,
+					QTEST_DRV_NAME, 0))
+			qtest_intr_disable(dev->data);
+#endif
+	}
 
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
@@ -1628,3 +1719,271 @@ static struct rte_driver rte_cvio_driver = {
 PMD_REGISTER_DRIVER(rte_cvio_driver);
 
 #endif
+
+#ifdef RTE_VIRTIO_VDEV_QTEST
+
+#define ETH_VIRTIO_NET_ARG_QTEST_PATH           "qtest"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_PATH         "ivshmem"
+#define ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR      "virtio-net-addr"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR         "ivshmem-addr"
+#define ETH_VIRTIO_NET_ARG_PIIX3_ADDR           "piix3-addr"
+
+static const char *valid_qtest_args[] = {
+       ETH_VIRTIO_NET_ARG_QTEST_PATH,
+       ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
+       ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR,
+       ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR,
+       ETH_VIRTIO_NET_ARG_PIIX3_ADDR,
+       NULL
+};
+
+static int
+get_socket_path_arg(const char *key __rte_unused,
+               const char *value, void *extra_args)
+{
+       int ret, fd, loop = 100;
+       int *pfd = extra_args;
+       struct sockaddr_un sa = {0};
+
+       if ((value == NULL) || (extra_args == NULL))
+               return -EINVAL;
+
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
+       if (fd < 0)
+               return -1;
+
+       sa.sun_family = AF_UNIX;
+       strncpy(sa.sun_path, value, sizeof(sa.sun_path));
+
+       while (loop--) {
+               /*
+                * may need to wait for qtest and ivshmem
+                * sockets are prepared by QEMU.
+                */
+               ret = connect(fd, (struct sockaddr *)&sa,
+                               sizeof(struct sockaddr_un));
+               if (ret == 0)
+                       break;
+               else
+                       usleep(100000);
+       }
+
+       if (ret != 0) {
+               close(fd);
+               return -1;
+       }
+
+       *pfd = fd;
+
+       return 0;
+}
+
+static int
+get_pci_addr_arg(const char *key __rte_unused,
+               const char *value, void *extra_args)
+{
+       struct rte_pci_addr *addr = extra_args;
+
+       if ((value == NULL) || (extra_args == NULL))
+               return -EINVAL;
+
+       if (eal_parse_pci_DomBDF(value, addr) != 0)
+               return -1;
+
+       if (addr->domain != 0)
+               return -1;
+
+       return 0;
+}
+
+static struct rte_eth_dev *
+virtio_net_eth_dev_alloc(const char *name)
+{
+       struct rte_eth_dev *eth_dev;
+       struct rte_eth_dev_data *data;
+       struct virtio_hw *hw;
+
+       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+       if (eth_dev == NULL)
+               rte_panic("cannot alloc rte_eth_dev\n");
+
+       data = eth_dev->data;
+
+       hw = rte_zmalloc(NULL, sizeof(*hw), 0);
+       if (!hw)
+               rte_panic("malloc virtio_hw failed\n");
+
+       data->dev_private = hw;
+       eth_dev->driver = &rte_virtio_pmd;
+       return eth_dev;
+}
+
+static int
+virtio_net_eth_pmd_parse_socket_path(struct rte_kvargs *kvlist,
+               const char *option, int *socket)
+{
+       int ret;
+
+       if (rte_kvargs_count(kvlist, option) == 1) {
+               ret = rte_kvargs_process(kvlist, option,
+                               &get_socket_path_arg, socket);
+               if (ret != 0) {
+                       PMD_INIT_LOG(ERR,
+                               "Failed to connect to %s socket", option);
+                       return -1;
+               }
+       } else {
+               PMD_INIT_LOG(ERR, "No argument specified for %s", option);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+virtio_net_eth_pmd_parse_pci_addr(struct rte_kvargs *kvlist,
+               const char *option, struct rte_pci_addr *addr,
+               struct rte_pci_addr *default_addr)
+{
+       int ret;
+
+       if (rte_kvargs_count(kvlist, option) == 1) {
+               ret = rte_kvargs_process(kvlist, option,
+                               &get_pci_addr_arg, addr);
+               if (ret != 0) {
+                       PMD_INIT_LOG(ERR,
+                               "Specified invalid address in '%s'", option);
+                       return -1;
+               }
+       } else
+               *addr = *default_addr;
+
+       return 0;
+}
+
+/*
+ * Initialization when "CONFIG_RTE_VIRTIO_VDEV_QTEST" is enabled.
+ */
+static int
+rte_qtest_virtio_pmd_init(const char *name, const char *params)
+{
+       struct rte_kvargs *kvlist = NULL;
+       struct rte_eth_dev *eth_dev = NULL;
+       int ret, qtest_sock = 0, ivshmem_sock = 0;
+       struct rte_pci_addr virtio_addr, ivshmem_addr, piix3_addr, default_addr;
+
+       if (params == NULL || params[0] == '\0')
+               goto error;
+
+       kvlist = rte_kvargs_parse(params, valid_qtest_args);
+       if (!kvlist) {
+               PMD_INIT_LOG(ERR, "error when parsing param");
+               return -EFAULT;
+       }
+
+       ret = virtio_net_eth_pmd_parse_socket_path(kvlist,
+                       ETH_VIRTIO_NET_ARG_IVSHMEM_PATH, &ivshmem_sock);
+       if (ret < 0)
+               goto error;
+
+       ret = virtio_net_eth_pmd_parse_socket_path(kvlist,
+                       ETH_VIRTIO_NET_ARG_QTEST_PATH, &qtest_sock);
+       if (ret < 0)
+               goto error;
+
+       default_addr.domain = 0;
+       default_addr.bus = 0;
+       default_addr.function = 0;
+
+       default_addr.devid = 3;
+       ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+                       ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR,
+                       &virtio_addr, &default_addr);
+       if (ret < 0)
+               goto error;
+
+       default_addr.devid = 4;
+       ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+                       ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR,
+                       &ivshmem_addr, &default_addr);
+       if (ret < 0)
+               goto error;
+
+       default_addr.devid = 1;
+       ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+                       ETH_VIRTIO_NET_ARG_PIIX3_ADDR,
+                       &piix3_addr, &default_addr);
+       if (ret < 0)
+               goto error;
+
+       eth_dev = virtio_net_eth_dev_alloc(name);
+       if (eth_dev == NULL)
+               goto error;
+
+       ret = qtest_vdev_init(eth_dev->data, qtest_sock, ivshmem_sock,
+                       &virtio_addr, &ivshmem_addr, &piix3_addr);
+       if (ret < 0)
+               goto error;
+
+       /* originally, this will be called in rte_eal_pci_probe() */
+       ret = eth_virtio_dev_init(eth_dev);
+       if (ret < 0)
+               goto error;
+
+       eth_dev->driver = NULL;
+       eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
+       eth_dev->data->kdrv = RTE_KDRV_NONE;
+       eth_dev->data->drv_name = QTEST_DRV_NAME;
+
+       rte_kvargs_free(kvlist);
+       return 0;
+
+error:
+       if (qtest_sock)
+               close (qtest_sock);
+       if (ivshmem_sock)
+               close (ivshmem_sock);
+       rte_kvargs_free(kvlist);
+       return -EFAULT;
+}
+
+/*
+ * Finalization when "CONFIG_RTE_VIRTIO_VDEV_QTEST" is enabled.
+ */
+static int
+rte_qtest_virtio_pmd_uninit(const char *name)
+{
+       struct rte_eth_dev *eth_dev = NULL;
+       int ret;
+
+       if (name == NULL)
+               return -EINVAL;
+
+       /* find the ethdev entry */
+       eth_dev = rte_eth_dev_allocated(name);
+       if (eth_dev == NULL)
+               return -ENODEV;
+
+       ret = eth_virtio_dev_uninit(eth_dev);
+       if (ret != 0)
+               return -EFAULT;
+
+       qtest_vdev_uninit(eth_dev->data);
+       rte_free(eth_dev->data->dev_private);
+
+       ret = rte_eth_dev_release_port(eth_dev);
+       if (ret != 0)
+               return -EFAULT;
+
+       return 0;
+}
+
+static struct rte_driver rte_qtest_virtio_driver = {
+       .name   = QTEST_DRV_NAME,
+       .type   = PMD_VDEV,
+       .init   = rte_qtest_virtio_pmd_init,
+       .uninit = rte_qtest_virtio_pmd_uninit,
+};
+
+PMD_REGISTER_DRIVER(rte_qtest_virtio_driver);
+#endif /* RTE_VIRTIO_VDEV_QTEST */
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index e04c0db..f4a2711 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -37,10 +37,15 @@
  #include <fcntl.h>
 #endif
 
+#include "virtio_ethdev.h"
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+#include "qtest.h"
+#endif
+
 /*
  * Following macros are derived from linux/pci_regs.h, however,
  * we can't simply include that header here, as there is no such
@@ -449,6 +454,220 @@ static const struct virtio_pci_ops modern_ops = {
 };
 
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+static inline uint8_t
+qtest_read8(struct virtio_hw *hw, uint8_t *addr)
+{
+	return qtest_read(hw, (uint64_t)addr, 'b');
+}
+
+static inline void
+qtest_write8(struct virtio_hw *hw, uint8_t val, uint8_t *addr)
+{
+	return qtest_write(hw, (uint64_t)addr, val, 'b');
+}
+
+static inline uint16_t
+qtest_read16(struct virtio_hw *hw, uint16_t *addr)
+{
+	return qtest_read(hw, (uint64_t)addr, 'w');
+}
+
+static inline void
+qtest_write16(struct virtio_hw *hw, uint16_t val, uint16_t *addr)
+{
+	return qtest_write(hw, (uint64_t)addr, val, 'w');
+}
+
+static inline uint32_t
+qtest_read32(struct virtio_hw *hw, uint32_t *addr)
+{
+	return qtest_read(hw, (uint64_t)addr, 'l');
+}
+
+static inline void
+qtest_write32(struct virtio_hw *hw, uint32_t val, uint32_t *addr)
+{
+	return qtest_write(hw, (uint64_t)addr, val, 'l');
+}
+
+static inline void
+qtest_write64_twopart(struct virtio_hw *hw,
+		uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+	qtest_write32(hw, val & ((1ULL << 32) - 1), lo);
+	qtest_write32(hw, val >> 32,		     hi);
+}
+
+static void
+qtest_modern_read_dev_config(struct virtio_hw *hw, size_t offset,
+		       void *dst, int length)
+{
+	int i;
+	uint8_t *p;
+	uint8_t old_gen, new_gen;
+
+	do {
+		old_gen = qtest_read8(hw, &hw->common_cfg->config_generation);
+
+		p = dst;
+		for (i = 0;  i < length; i++)
+			*p++ = qtest_read8(hw, (uint8_t *)hw->dev_cfg + offset + i);
+
+		new_gen = qtest_read8(hw, &hw->common_cfg->config_generation);
+	} while (old_gen != new_gen);
+}
+
+static void
+qtest_modern_write_dev_config(struct virtio_hw *hw, size_t offset,
+			const void *src, int length)
+{
+	int i;
+	const uint8_t *p = src;
+
+	for (i = 0;  i < length; i++)
+		qtest_write8(hw, *p++, (uint8_t *)hw->dev_cfg + offset + i);
+}
+
+static uint64_t
+qtest_modern_get_features(struct virtio_hw *hw)
+{
+	uint32_t features_lo, features_hi;
+
+	qtest_write32(hw, 0, &hw->common_cfg->device_feature_select);
+	features_lo = qtest_read32(hw, &hw->common_cfg->device_feature);
+
+	qtest_write32(hw, 1, &hw->common_cfg->device_feature_select);
+	features_hi = qtest_read32(hw, &hw->common_cfg->device_feature);
+
+	return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static void
+qtest_modern_set_features(struct virtio_hw *hw, uint64_t features)
+{
+	qtest_write32(hw, 0, &hw->common_cfg->guest_feature_select);
+	qtest_write32(hw, features & ((1ULL << 32) - 1),
+		&hw->common_cfg->guest_feature);
+
+	qtest_write32(hw, 1, &hw->common_cfg->guest_feature_select);
+	qtest_write32(hw, features >> 32,
+		&hw->common_cfg->guest_feature);
+}
+
+static uint8_t
+qtest_modern_get_status(struct virtio_hw *hw)
+{
+	return qtest_read8(hw, &hw->common_cfg->device_status);
+}
+
+static void
+qtest_modern_set_status(struct virtio_hw *hw, uint8_t status)
+{
+	qtest_write8(hw, status, &hw->common_cfg->device_status);
+}
+
+static void
+qtest_modern_reset(struct virtio_hw *hw)
+{
+	modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+	modern_get_status(hw);
+}
+
+static uint8_t
+qtest_modern_get_isr(struct virtio_hw *hw)
+{
+	return qtest_read8(hw, hw->isr);
+}
+
+static uint16_t
+qtest_modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
+{
+	qtest_write16(hw, vec, &hw->common_cfg->msix_config);
+	return qtest_read16(hw, &hw->common_cfg->msix_config);
+}
+
+static uint16_t
+qtest_modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
+{
+	qtest_write16(hw, queue_id, &hw->common_cfg->queue_select);
+	return qtest_read16(hw, &hw->common_cfg->queue_size);
+}
+
+static void
+qtest_modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	uint64_t desc_addr, avail_addr, used_addr;
+	uint16_t notify_off;
+
+	desc_addr = (uint64_t)vq->mz->addr;
+	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
+							 ring[vq->vq_nentries]),
+				   VIRTIO_PCI_VRING_ALIGN);
+
+	qtest_write16(hw, vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	qtest_write64_twopart(hw, desc_addr, &hw->common_cfg->queue_desc_lo,
+				      &hw->common_cfg->queue_desc_hi);
+	qtest_write64_twopart(hw, avail_addr, &hw->common_cfg->queue_avail_lo,
+				       &hw->common_cfg->queue_avail_hi);
+	qtest_write64_twopart(hw, used_addr, &hw->common_cfg->queue_used_lo,
+				      &hw->common_cfg->queue_used_hi);
+
+	notify_off = qtest_read16(hw, &hw->common_cfg->queue_notify_off);
+	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
+				notify_off * hw->notify_off_multiplier);
+
+	qtest_write16(hw, 1, &hw->common_cfg->queue_enable);
+
+	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
+	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
+	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
+	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
+	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
+		vq->notify_addr, notify_off);
+}
+
+static void
+qtest_modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	qtest_write16(hw, vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_desc_lo,
+				  &hw->common_cfg->queue_desc_hi);
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_avail_lo,
+				  &hw->common_cfg->queue_avail_hi);
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_used_lo,
+				  &hw->common_cfg->queue_used_hi);
+
+	qtest_write16(hw, 0, &hw->common_cfg->queue_enable);
+}
+
+static void
+qtest_modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
+{
+	qtest_write16(hw, 1, vq->notify_addr);
+}
+
+static const struct virtio_pci_ops qtest_modern_ops = {
+	.read_dev_cfg	= qtest_modern_read_dev_config,
+	.write_dev_cfg	= qtest_modern_write_dev_config,
+	.reset		= qtest_modern_reset,
+	.get_status	= qtest_modern_get_status,
+	.set_status	= qtest_modern_set_status,
+	.get_features	= qtest_modern_get_features,
+	.set_features	= qtest_modern_set_features,
+	.get_isr	= qtest_modern_get_isr,
+	.set_config_irq	= qtest_modern_set_config_irq,
+	.get_queue_num	= qtest_modern_get_queue_num,
+	.setup_queue	= qtest_modern_setup_queue,
+	.del_queue	= qtest_modern_del_queue,
+	.notify_queue	= qtest_modern_notify_queue,
+};
+#endif /* RTE_VIRTIO_VDEV_QTEST */
+
+
 void
 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
 		      void *dst, int length)
@@ -522,12 +741,16 @@ vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
 }
 
 static void *
-get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
+get_cfg_addr(struct rte_eth_dev *eth_dev,
+		struct virtio_hw *hw,
+		struct virtio_pci_cap *cap)
 {
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
 	uint8_t  bar    = cap->bar;
 	uint32_t length = cap->length;
 	uint32_t offset = cap->offset;
-	uint8_t *base;
+	uint8_t *base = NULL;
+	uint64_t size = 0;
 
 	if (bar > 5) {
 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
@@ -540,14 +763,27 @@ get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
 		return NULL;
 	}
 
-	if (offset + length > dev->mem_resource[bar].len) {
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
+		size = pci_dev->mem_resource[bar].len;
+		base = pci_dev->mem_resource[bar].addr;
+	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0)) {
+		qtest_get_bar_size(hw, "virtio-net", bar, &size);
+		qtest_get_bar_addr(hw, "virtio-net", bar, (uint64_t **)&base);
+	}
+#else
+	RTE_SET_USED(hw);
+#endif
+
+	if (offset + length > size) {
 		PMD_INIT_LOG(ERR,
 			"invalid cap: overflows bar space: %u > %" PRIu64,
-			offset + length, dev->mem_resource[bar].len);
+			offset + length, size);
 		return NULL;
 	}
 
-	base = dev->mem_resource[bar].addr;
 	if (base == NULL) {
 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
 		return NULL;
@@ -557,25 +793,48 @@ get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
 }
 
 static int
-virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
+virtio_read_pci_config(struct rte_eth_dev *eth_dev,
+			struct virtio_hw *hw,
+			void *buf, size_t len, off_t offset)
 {
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	int ret = -1;
+
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0))
+		ret = rte_eal_pci_read_config(pci_dev, buf, len, offset);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0))
+		ret = qtest_read_pci_cfg(hw, "virtio-net", buf, len, offset);
+#else
+	RTE_SET_USED(hw);
+#endif
+
+	return ret;
+}
+
+static int
+virtio_read_caps(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
 	uint8_t pos;
 	struct virtio_pci_cap cap;
 	int ret;
 
-	if (rte_eal_pci_map_device(dev)) {
+	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(rte_eal_pci_map_device(pci_dev) < 0)) {
 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
 		return -1;
 	}
 
-	ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+	ret = virtio_read_pci_config(eth_dev, hw, &pos, 1, PCI_CAPABILITY_LIST);
 	if (ret < 0) {
 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
 		return -1;
 	}
 
 	while (pos) {
-		ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos);
+		ret = virtio_read_pci_config(eth_dev, hw, &cap, sizeof(cap), pos);
 		if (ret < 0) {
 			PMD_INIT_LOG(ERR,
 				"failed to read pci cap at pos: %x", pos);
@@ -595,18 +854,19 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 
 		switch (cap.cfg_type) {
 		case VIRTIO_PCI_CAP_COMMON_CFG:
-			hw->common_cfg = get_cfg_addr(dev, &cap);
+			hw->common_cfg = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
-			rte_eal_pci_read_config(dev, &hw->notify_off_multiplier,
+			virtio_read_pci_config(eth_dev, hw,
+						&hw->notify_off_multiplier,
 						4, pos + sizeof(cap));
-			hw->notify_base = get_cfg_addr(dev, &cap);
+			hw->notify_base = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_DEVICE_CFG:
-			hw->dev_cfg = get_cfg_addr(dev, &cap);
+			hw->dev_cfg = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_ISR_CFG:
-			hw->isr = get_cfg_addr(dev, &cap);
+			hw->isr = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		}
 
@@ -631,31 +891,77 @@ next:
 	return 0;
 }
 
+static int
+vtpci_modern_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+
+	PMD_INIT_LOG(INFO, "modern virtio pci detected.");
+
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
+		hw->vtpci_ops = &modern_ops;
+		pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL, NULL, 0)) {
+		hw->vtpci_ops = &qtest_modern_ops;
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+	}
+#endif
+
+	hw->modern = 1;
+
+	return 0;
+}
+
+static int
+vtpci_legacy_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	struct virtio_pci_cap cap;
+
+	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
+		if (legacy_virtio_resource_init(pci_dev, hw) < 0)
+			return -1;
+
+		hw->vtpci_ops = &legacy_ops;
+		hw->use_msix = legacy_virtio_has_msix(&pci_dev->addr);
+	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0)) {
+		PMD_INIT_LOG(ERR, "Legacy virtio device isn't supported.");
+		return -1;
+	}
+#endif
+
+	cap.bar = cap.length = cap.offset = 0;
+	hw->modern = 0;
+
+	return 0;
+}
+
 int
-vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
 {
-	hw->dev = dev;
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	int ret;
+
+	hw->dev = pci_dev;
 
 	/*
 	 * Try if we can succeed reading virtio pci caps, which exists
 	 * only on modern pci device. If failed, we fallback to legacy
 	 * virtio handling.
 	 */
-	if (virtio_read_caps(dev, hw) == 0) {
-		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
-		hw->vtpci_ops = &modern_ops;
-		hw->modern    = 1;
-		dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
-		return 0;
-	}
+	if (virtio_read_caps(eth_dev, hw) == 0)
+		ret = vtpci_modern_init(eth_dev, hw);
+	else
+		ret = vtpci_legacy_init(eth_dev, hw);
 
-	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
-	if (legacy_virtio_resource_init(dev, hw) < 0)
+	if (ret < 0)
 		return -1;
 
-	hw->vtpci_ops = &legacy_ops;
-	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
-	hw->modern   = 0;
-
 	return 0;
 }
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index ae6777d..41268a7 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -242,6 +242,9 @@ struct virtio_net_config;
 
 struct virtio_hw {
 	struct virtqueue *cvq;
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	void        *qsession;
+#endif
 	struct rte_pci_ioport io;
 	uint64_t    guest_features;
 	uint32_t    max_tx_queues;
@@ -306,7 +309,7 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
 /*
  * Function declaration from virtio_pci.c
  */
-int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
+int vtpci_init(struct rte_eth_dev *, struct virtio_hw *);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v2 5/5] docs: add release note for qtest virtio container support
  2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
                       ` (11 preceding siblings ...)
  2016-02-10  3:40     ` [PATCH v2 4/5] virtio: Add support for qtest virtio-net PMD Tetsuya Mukawa
@ 2016-02-10  3:40     ` Tetsuya Mukawa
  12 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-10  3:40 UTC (permalink / raw)
  To: dev

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 doc/guides/rel_notes/release_2_3.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/guides/rel_notes/release_2_3.rst b/doc/guides/rel_notes/release_2_3.rst
index 1e7d51d..ab3baf9 100644
--- a/doc/guides/rel_notes/release_2_3.rst
+++ b/doc/guides/rel_notes/release_2_3.rst
@@ -43,6 +43,9 @@ This section should contain new features added in this release. Sample format:
 
   Add a new virtual device, named eth_cvio, to support virtio for containers.
 
+* **Virtio support for containers using QEMU qtest mode.**
+  Add a new virtual device, named eth_qtest_virtio, to support virtio for containers
+  using QEMU qtest mode.
 
 Resolved Issues
 ---------------
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
@ 2016-02-15  7:52       ` Tan, Jianfeng
  2016-02-16  1:32         ` Tetsuya Mukawa
  2016-02-16  5:53       ` David Marchand
                         ` (7 subsequent siblings)
  8 siblings, 1 reply; 120+ messages in thread
From: Tan, Jianfeng @ 2016-02-15  7:52 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev

Hi Tetsuya,

On 2/10/2016 11:40 AM, Tetsuya Mukawa wrote:
> To work with qtest virtio-net PMD, virtual address that maps hugepages
> should be between (1 << 31) to (1 << 44). This patch adds one more option

Is there any reference about this limitation? And is it also true for 32 
bit machine?

Thanks,
Jianfeng

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option
  2016-02-15  7:52       ` Tan, Jianfeng
@ 2016-02-16  1:32         ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-16  1:32 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 2016/02/15 16:52, Tan, Jianfeng wrote:
> Hi Tetsuya,
>
> On 2/10/2016 11:40 AM, Tetsuya Mukawa wrote:
>> To work with qtest virtio-net PMD, virtual address that maps hugepages
>> should be between (1 << 31) to (1 << 44). This patch adds one more
>> option
>
> Is there any reference about this limitation? And is it also true for
> 32 bit machine?
>

Hi Jianfeng,

44bit limitation is come from virtio legacy device spec.
The queue address register of virtio device is 32bit width.
And we should set page number to this register.
As a result, EAL memory should be under 44 bits.

I only support virtio modern device with this patch series.
So we can relax this limitation a bit.
(Next limitation may be 47 bits. It seems it is come from QEMU
implementation.)
But I guess 44bit limitation is still not so hard, also we can leave a
possibility to support legacy device.

31bits limitation is come from current memory mapping of QTest QEMU guest.
Here is.

 * ------------------------------------------------------------
 * Memory mapping of qtest quest
 * ------------------------------------------------------------
 * 0x00000000_00000000 - 0x00000000_3fffffff : not used
 * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
 * 0x00000000_40001000 - 0x00000000_40ffffff : not used
 * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
 * 0x00000000_41800000 - 0x00000000_41ffffff : not used
 * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
 * 0x00000000_42000100 - 0x00000000_42ffffff : not used
 * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)

Thanks,
Tetsuya


> Thanks,
> Jianfeng

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
  2016-02-15  7:52       ` Tan, Jianfeng
@ 2016-02-16  5:53       ` David Marchand
  2016-02-16 11:36         ` Tan, Jianfeng
  2016-02-22  8:17       ` [PATCH v3 0/6] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
                         ` (6 subsequent siblings)
  8 siblings, 1 reply; 120+ messages in thread
From: David Marchand @ 2016-02-16  5:53 UTC (permalink / raw)
  To: Tetsuya Mukawa; +Cc: dev

On Wed, Feb 10, 2016 at 4:40 AM, Tetsuya Mukawa <mukawa@igel.co.jp> wrote:
> To work with qtest virtio-net PMD, virtual address that maps hugepages
> should be between (1 << 31) to (1 << 44). This patch adds one more option
> to map like this. Also all hugepages should consists of one file.
> Because of this, the option will work only when '--single-file' option is
> specified.

This patch is pure virtio stuff.
Please, rework this so that we have a generic api in eal (asking for a
free region could be of use for something else).
Then you can call this api from virtio pmd.

If you need to pass options to virtio pmd, add some devargs for it.


Thanks.

-- 
David Marchand

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option
  2016-02-16  5:53       ` David Marchand
@ 2016-02-16 11:36         ` Tan, Jianfeng
  2016-02-17  3:36           ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Tan, Jianfeng @ 2016-02-16 11:36 UTC (permalink / raw)
  To: David Marchand, Tetsuya Mukawa; +Cc: dev

Hi David,

On 2/16/2016 1:53 PM, David Marchand wrote:
> On Wed, Feb 10, 2016 at 4:40 AM, Tetsuya Mukawa <mukawa@igel.co.jp> wrote:
>> To work with qtest virtio-net PMD, virtual address that maps hugepages
>> should be between (1 << 31) to (1 << 44). This patch adds one more option
>> to map like this. Also all hugepages should consists of one file.
>> Because of this, the option will work only when '--single-file' option is
>> specified.
> This patch is pure virtio stuff.
> Please, rework this so that we have a generic api in eal (asking for a
> free region could be of use for something else).
> Then you can call this api from virtio pmd.
>
> If you need to pass options to virtio pmd, add some devargs for it.
>

Seems it's hard to slip this option into --vdev="eth_qtest_virtio0..." 
from my side because memory initialization happens before vdev option is 
parsed.

Can we make use of "--base-virtaddr" achieve the function of this option?

Thanks,
Jianfeng

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option
  2016-02-16 11:36         ` Tan, Jianfeng
@ 2016-02-17  3:36           ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-17  3:36 UTC (permalink / raw)
  To: Tan, Jianfeng, David Marchand; +Cc: dev

On 2016/02/16 20:36, Tan, Jianfeng wrote:
> Hi David,
>
> On 2/16/2016 1:53 PM, David Marchand wrote:
>> On Wed, Feb 10, 2016 at 4:40 AM, Tetsuya Mukawa <mukawa@igel.co.jp>
>> wrote:
>>> To work with qtest virtio-net PMD, virtual address that maps hugepages
>>> should be between (1 << 31) to (1 << 44). This patch adds one more
>>> option
>>> to map like this. Also all hugepages should consists of one file.
>>> Because of this, the option will work only when '--single-file'
>>> option is
>>> specified.
>> This patch is pure virtio stuff.
>> Please, rework this so that we have a generic api in eal (asking for a
>> free region could be of use for something else).
>> Then you can call this api from virtio pmd.
>>
>> If you need to pass options to virtio pmd, add some devargs for it.
>>
>
> Seems it's hard to slip this option into --vdev="eth_qtest_virtio0..."
> from my side because memory initialization happens before vdev option
> is parsed.
>
> Can we make use of "--base-virtaddr" achieve the function of this option?

I think same thing also.

Option1 is just using "--base-virtaddr" option without any fixes.

Option2 is adding "--range-virtaddr" option.
When "--range-option" is set, EAL will find free region in specified
memory region, then set the address to 'base_vrtiaddr' variable.
This will be done before base_addr variable is used by current
implementation.
How about this?

Here is rough implementation.

diff --git a/lib/librte_eal/common/eal_common_options.c
b/lib/librte_eal/common/eal_common_options.c
index 65bccbd..996b61d 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -74,6 +74,8 @@ eal_short_options[] =
 const struct option
 eal_long_options[] = {
        {OPT_BASE_VIRTADDR,     1, NULL, OPT_BASE_VIRTADDR_NUM    },
+       {OPT_RANGE_VIRTADDR,    1, NULL, OPT_RANGE_VIRTADDR_NUM   },
+       {OPT_ALIGN_MEMSIZE,     0, NULL, OPT_ALIGN_MEMSIZE_NUM    },
        {OPT_CREATE_UIO_DEV,    0, NULL, OPT_CREATE_UIO_DEV_NUM   },
        {OPT_FILE_PREFIX,       1, NULL, OPT_FILE_PREFIX_NUM      },
        {OPT_HELP,              0, NULL, OPT_HELP_NUM             },
@@ -137,6 +139,9 @@ eal_reset_internal_config(struct internal_config
*internal_cfg)
        for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
                internal_cfg->hugepage_info[i].lock_descriptor = -1;
        internal_cfg->base_virtaddr = 0;
+       internal_cfg->range_virtaddr_start = 0;
+       internal_cfg->range_virtaddr_end = 0;
+       internal_cfg->align_memsize = 0;

        internal_cfg->syslog_facility = LOG_DAEMON;
        /* default value from build option */
@@ -985,6 +990,18 @@ eal_check_common_options(struct internal_config
*internal_cfg)
                return -1;
        }

+       if (internal_cfg->base_virtaddr &&
internal_cfg->range_virtaddr_end) {
+               RTE_LOG(ERR, EAL, "Option --"OPT_RANGE_VIRTADDR" cannot "
+                       "be specified together with
--"OPT_BASE_VIRTADDR"\n");
+               return -1;
+       }
+
+       if (internal_cfg->range_virtaddr_end !=0 &&
internal_cfg->align_memsize) {
+               RTE_LOG(ERR, EAL, "Option --"OPT_RANGE_VIRTADDR" should be "
+                       "specified together with --"OPT_ALIGN_MEMSIZE"\n");
+               return -1;
+       }
+
        return 0;
 }

diff --git a/lib/librte_eal/common/eal_internal_cfg.h
b/lib/librte_eal/common/eal_internal_cfg.h
index 9117ed9..df33a9f 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -78,6 +78,9 @@ struct internal_config {
        volatile unsigned force_sockets;
        volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of
memory per socket */
        uintptr_t base_virtaddr;          /**< base address to try and
reserve memory from */
+       uintptr_t range_virtaddr_start;   /**< start address of mappable
region */
+       uintptr_t range_virtaddr_end;     /**< end address of mappable
region */
+       volatile unsigned align_memsize;  /**< true to align virtaddr by
memory size */
        volatile int syslog_facility;     /**< facility passed to
openlog() */
        volatile uint32_t log_level;      /**< default log level */
        /** default interrupt mode for VFIO */
diff --git a/lib/librte_eal/common/eal_options.h
b/lib/librte_eal/common/eal_options.h
index e5da14a..9e36f68 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -47,6 +47,10 @@ enum {
        OPT_LONG_MIN_NUM = 256,
 #define OPT_BASE_VIRTADDR     "base-virtaddr"
        OPT_BASE_VIRTADDR_NUM,
+#define OPT_RANGE_VIRTADDR    "range-virtaddr"
+       OPT_RANGE_VIRTADDR_NUM,
+#define OPT_ALIGN_MEMSIZE     "align-memsize"
+       OPT_ALIGN_MEMSIZE_NUM,
 #define OPT_CREATE_UIO_DEV    "create-uio-dev"
        OPT_CREATE_UIO_DEV_NUM,
 #define OPT_FILE_PREFIX       "file-prefix"
diff --git a/lib/librte_eal/linuxapp/eal/eal.c
b/lib/librte_eal/linuxapp/eal/eal.c
index 82f34f7..e6c6b34 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -444,6 +444,38 @@ eal_parse_base_virtaddr(const char *arg)
 }

 static int
+eal_parse_range_virtaddr(const char *range)
+{
+       char *p, *endptr;
+       uint64_t tmp_start, tmp_end;
+
+       if (range[0] == '\0')
+               return -1;
+
+       p = strchr(range, '-');
+       if (p == NULL)
+               return -1;
+       *p++ = '\0';
+
+       errno = 0;
+       tmp_start = strtoul(range, &endptr, 0);
+       if ((errno != 0) || endptr == NULL || (*endptr != '\0'))
+               return -1;
+
+       tmp_end = strtoul(p, &endptr, 0);
+       if ((errno != 0) || endptr == NULL || (*endptr != '\0'))
+               return -1;
+
+       if (tmp_start >= tmp_end)
+               return -1;
+
+       internal_config.range_virtaddr_start = tmp_start;
+       internal_config.range_virtaddr_end = tmp_end;
+
+       return 0;
+}
+
+static int
 eal_parse_vfio_intr(const char *mode)
 {
        unsigned i;
@@ -604,6 +636,20 @@ eal_parse_args(int argc, char **argv)
                        }
                        break;

+               case OPT_RANGE_VIRTADDR_NUM:
+                       if (eal_parse_range_virtaddr(optarg) < 0) {
+                               RTE_LOG(ERR, EAL, "invalid parameter for --"
+                                               OPT_RANGE_VIRTADDR "\n");
+                               eal_usage(prgname);
+                               ret = -1;
+                               goto out;
+                       }
+                       break;
+
+               case OPT_ALIGN_MEMSIZE_NUM:
+                       internal_config.align_memsize = 1;
+                       break;
+
                case OPT_VFIO_INTR_NUM:
                        if (eal_parse_vfio_intr(optarg) < 0) {
                                RTE_LOG(ERR, EAL, "invalid parameters
for --"
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c
b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a6b3616..a9d30d7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -251,6 +251,75 @@ aslr_enabled(void)
 }

 /*
+ * Find memory space that fits user request.
+ */
+static uintptr_t
+rte_eal_get_free_region(uint64_t pagesz)
+{
+       uint64_t alloc_size, start, end, next_start;
+       uint64_t low_limit, high_limit;
+       uintptr_t addr = 0;
+       char buf[1024], *p;
+       FILE *fp;
+
+       alloc_size = internal_config.memory;
+       low_limit = internal_config.range_virtaddr_start;
+       high_limit = internal_config.range_virtaddr_end;
+
+       /* allocation size should be aligned by page size */
+       if (alloc_size != RTE_ALIGN_CEIL(alloc_size, pagesz)) {
+               rte_panic("Invalid allocation size 0x%lx\n", alloc_size);
+               return NULL;
+       }
+
+       if (internal_config.align_memsize) {
+               /*
+                * address should be aligned by allocation size because
+                * BAR register of PCI device requiers such an address
+                */
+               low_limit = RTE_ALIGN_CEIL(low_limit, alloc_size);
+               high_limit = RTE_ALIGN_FLOOR(high_limit, alloc_size);
+       }
+
+       fp = fopen("/proc/self/maps", "r");
+       if (fp == NULL) {
+               rte_panic("Cannot open /proc/self/maps\n");
+               return NULL;
+       }
+
+       next_start = 0;
+       do {
+               start = next_start;
+
+               if ((p = fgets(buf, sizeof(buf), fp)) != NULL) {
+                       if (sscanf(p, "%lx-%lx ", &end, &next_start) < 2)
+                               break;
+
+                       next_start = RTE_ALIGN_CEIL(next_start, alloc_size);
+                       end = RTE_ALIGN_CEIL(end, alloc_size) - 1;
+               } else
+                       end = UINT64_MAX;
+
+               if (start >= high_limit)
+                       break;
+               if (end < low_limit)
+                       continue;
+
+               start = RTE_MAX(start, low_limit);
+               end = RTE_MIN(end, high_limit - 1);
+
+               if (end - start >= alloc_size - 1) {
+                       addr = start;
+                       break;
+               }
+       } while (end != UINT64_MAX);
+
+       fclose(fp);
+
+       return addr;
+}
+
+/*
  * Try to mmap *size bytes in /dev/zero. If it is successful, return the
  * pointer to the mmap'd area and keep *size unmodified. Else, retry
  * with a smaller zone: decrease *size by hugepage_sz until it reaches
@@ -1126,6 +1195,25 @@ rte_eal_hugepage_init(void)
        /* get pointer to global configuration */
        mcfg = rte_eal_get_configuration()->mem_config;

+       if (internal_config.range_virtaddr_end) {
+               uint64_t pagesize = RTE_PGSIZE_4K;
+               struct hugepage_info *hpi;
+               unsigned n;
+               uintptr_t addr;
+
+               /* determine maximum hugepage size */
+               for (n = 0; n < internal_config.num_hugepage_sizes; n++) {
+                       hpi = &internal_config.hugepage_info[n];
+                       pagesize = RTE_MAX(hpi->hugepage_sz, pagesize);
+               }
+
+               addr = rte_eal_get_free_region(pagesize);
+               if (addr == 0)
+                       RTE_LOG(WARNING, EAL,
+                               "no free space to mmap in specified
region\n");
+               internal_config.base_virtaddr = addr;
+       }
+
        /* when hugetlbfs is disabled or single-file option is specified */
        if (internal_config.no_hugetlbfs || internal_config.single_file) {
                int fd;
@@ -1158,7 +1246,8 @@ rte_eal_hugepage_init(void)
                        return -1;
                }

-               addr = mmap(NULL, internal_config.memory,
+               addr = mmap((void *)internal_config.base_virtaddr,
+                           internal_config.memory,
                            PROT_READ | PROT_WRITE,
                            MAP_SHARED | MAP_POPULATE, fd, 0);
                if (addr == MAP_FAILED) {

 

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v3 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
  2016-02-15  7:52       ` Tan, Jianfeng
  2016-02-16  5:53       ` David Marchand
@ 2016-02-22  8:17       ` Tetsuya Mukawa
  2016-02-22  8:17       ` [PATCH v3 1/6] virtio: Retrieve driver name from eth_dev Tetsuya Mukawa
                         ` (5 subsequent siblings)
  8 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-22  8:17 UTC (permalink / raw)
  To: dev

The patches will work on below patch series.
 - [PATCH v2 0/5] virtio support for container

[Changes]
v3 changes:
 - Rebase on latest master.
 - remove "-qtest-virtio" option, then add "--range-virtaddr" and
   "--align-memsize" options.
 - Fix typos in qtest.c

v2 changes:
 - Rebase on above patch seiries.
 - Rebase on master
 - Add "--qtest-virtio" EAL option.
 - Fixes in qtest.c
  - Fix error handling for the case qtest connection is closed.
  - Use eventfd for interrupt messaging.
  - Use linux header for PCI register definitions.
  - Fix qtest_raw_send/recv to handle error correctly.
  - Fix bit mask of PCI_CONFIG_ADDR.
  - Describe memory and ioport usage of qtest guest in qtest.c
  - Remove loop that is for finding PCI devices.


[Abstraction]

Normally, virtio-net PMD only works on VM, because there is no virtio-net device on host.
This patches extend  virtio-net PMD to be able to work on host as virtual PMD.
But we didn't implement virtio-net device as a part of virtio-net PMD.
To prepare virtio-net device for the PMD, start QEMU process with special QTest mode, then connect it from virtio-net PMD through unix domain socket.

The PMD can connect to anywhere QEMU virtio-net device can.
For example, the PMD can connects to vhost-net kernel module and vhost-user backend application.
Similar to virtio-net PMD on QEMU, application memory that uses virtio-net PMD will be shared between vhost backend application.
But vhost backend application memory will not be shared.

Main target of this PMD is container like docker, rkt, lxc and etc.
We can isolate related processes(virtio-net PMD process, QEMU and vhost-user backend process) by container.
But, to communicate through unix domain socket, shared directory will be needed.


[How to use]

 Please use QEMU-2.5.1, or above.
 (So far, QEMU-2.5.1 hasn't been released yet, so please checkout master from QEMU repository)

 - Compile
 Set "CONFIG_RTE_VIRTIO_VDEV_QTEST=y" in config/common_linux.
 Then compile it.

 - Start QEMU like below.
 $ qemu-system-x86_64 \
              -machine pc-i440fx-1.4,accel=qtest \
              -display none -qtest-log /dev/null \
              -qtest unix:/tmp/socket,server \
              -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
              -device virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
              -chardev socket,id=chr1,path=/tmp/ivshmem,server \
              -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4

 - Start DPDK application like below
 $ testpmd -c f -n 1 -m 1024 --no-pci --single-file --qtest-virtio \
             --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
             -- --disable-hw-vlan --txqflags=0xf00 -i

(*1) Please Specify same memory size in QEMU and DPDK command line.
(*2) Should use qemu-2.5.1, or above.
(*3) QEMU process is needed per port.
(*4) virtio-1.0 device are only supported.
(*5) The vhost backends like vhost-net and vhost-user can be specified.
(*6) In most cases, just using above command is enough, but you can also
     specify other QEMU virtio-net options.
(*7) Only checked "pc-i440fx-1.4" machine, but may work with other
     machines. It depends on a machine has piix3 south bridge.
     If the machine doesn't have, virtio-net PMD cannot receive status
     changed interrupts.
(*8) Should not add "--enable-kvm" to QEMU command line.


[Detailed Description]

 - virtio-net device implementation
The PMD uses QEMU virtio-net device. To do that, QEMU QTest functionality is used.
QTest is a test framework of QEMU devices. It allows us to implement a device driver outside of QEMU.
With QTest, we can implement DPDK application and virtio-net PMD as standalone process on host.
When QEMU is invoked as QTest mode, any guest code will not run.
To know more about QTest, see below.
http://wiki.qemu.org/Features/QTest

 - probing devices
QTest provides a unix domain socket. Through this socket, driver process can access to I/O port and memory of QEMU virtual machine.
The PMD will send I/O port accesses to probe pci devices.
If we can find virtio-net and ivshmem device, initialize the devices.
Also, I/O port accesses of virtio-net PMD will be sent through socket, and virtio-net PMD can initialize vitio-net device on QEMU correctly.

 - ivshmem device to share memory
To share memory that virtio-net PMD process uses, ivshmem device will be used.
Because ivshmem device can only handle one file descriptor, shared memory should be consist of one file.
To allocate such a memory, EAL has new option called "--single-file".
Also, the hugepages should be mapped between "1 << 31" to "1 << 44".
To map like above, EAL has one more new option called "-qtest-virtio".
While initializing ivshmem device, we can set BAR(Base Address Register).
It represents which memory QEMU vcpu can access to this shared memory.
We will specify host virtual address of shared memory as this address.
It is very useful because we don't need to apply patch to QEMU to calculate address offset.
(For example, if virtio-net PMD process will allocate memory from shared memory, then specify the virtual address of it to virtio-net register, QEMU virtio-net device can understand it without calculating address offset.)


Tetsuya Mukawa (6):
  virtio: Retrieve driver name from eth_dev
  vhost: Add a function to check virtio device type
  EAL: Add new EAL "--range-virtaddr" option
  EAL: Add a new "--align-memsize" option
  virtio: Add support for qtest virtio-net PMD
  docs: add release note for qtest virtio container support

 config/common_linuxapp                     |    1 +
 doc/guides/rel_notes/release_16_04.rst     |    3 +
 drivers/net/virtio/Makefile                |    4 +
 drivers/net/virtio/qtest.c                 | 1342 ++++++++++++++++++++++++++++
 drivers/net/virtio/qtest.h                 |   65 ++
 drivers/net/virtio/virtio_ethdev.c         |  433 ++++++++-
 drivers/net/virtio/virtio_ethdev.h         |   32 +
 drivers/net/virtio/virtio_pci.c            |  364 +++++++-
 drivers/net/virtio/virtio_pci.h            |    5 +-
 lib/librte_eal/common/eal_common_options.c |   17 +
 lib/librte_eal/common/eal_internal_cfg.h   |    3 +
 lib/librte_eal/common/eal_options.h        |    4 +
 lib/librte_eal/linuxapp/eal/eal.c          |   43 +
 lib/librte_eal/linuxapp/eal/eal_memory.c   |   91 +-
 14 files changed, 2338 insertions(+), 69 deletions(-)
 create mode 100644 drivers/net/virtio/qtest.c
 create mode 100644 drivers/net/virtio/qtest.h

-- 
2.1.4

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH v3 1/6] virtio: Retrieve driver name from eth_dev
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
                         ` (2 preceding siblings ...)
  2016-02-22  8:17       ` [PATCH v3 0/6] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
@ 2016-02-22  8:17       ` Tetsuya Mukawa
  2016-02-22  8:17       ` [PATCH v3 2/6] vhost: Add a function to check virtio device type Tetsuya Mukawa
                         ` (4 subsequent siblings)
  8 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-22  8:17 UTC (permalink / raw)
  To: dev

Currently, virtio_dev_info_get() retrieves driver name from pci_drv.
If the driver is virtual PMD, pci_drv will be invalid.
So retrieves the name from eth_dev.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index bff1926..429377b 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1438,7 +1438,7 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
 
-	dev_info->driver_name = dev->driver->pci_drv.name;
+	dev_info->driver_name = dev->data->drv_name;
 	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
 	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v3 2/6] vhost: Add a function to check virtio device type
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
                         ` (3 preceding siblings ...)
  2016-02-22  8:17       ` [PATCH v3 1/6] virtio: Retrieve driver name from eth_dev Tetsuya Mukawa
@ 2016-02-22  8:17       ` Tetsuya Mukawa
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                         ` (3 subsequent siblings)
  8 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-22  8:17 UTC (permalink / raw)
  To: dev

The patch adds below function to cleanup virtio code.
 - virtio_dev_check()

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c | 52 ++++++++++++++++++--------------------
 drivers/net/virtio/virtio_ethdev.h | 32 +++++++++++++++++++++++
 2 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 429377b..bc631c7 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -371,7 +371,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 	vq->mz = mz;
 	vq->vq_ring_virt_mem = mz->addr;
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		vq->vq_ring_mem = mz->phys_addr;
 
 		/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
@@ -429,7 +429,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 		vq->virtio_net_hdr_vaddr = mz->addr;
 		memset(vq->virtio_net_hdr_vaddr, 0, hdr_size);
 
-		if (dev->dev_type == RTE_ETH_DEV_PCI)
+		if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 			vq->virtio_net_hdr_mem = mz->phys_addr;
 #ifdef RTE_VIRTIO_VDEV
 		else
@@ -439,7 +439,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 	hw->vtpci_ops->setup_queue(hw, vq);
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI)
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
 #ifdef RTE_VIRTIO_VDEV
 	else
@@ -490,15 +490,13 @@ static void
 virtio_dev_close(struct rte_eth_dev *dev)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
 
 	/* reset the NIC */
-	if (dev->dev_type == RTE_ETH_DEV_PCI) {
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-			vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
-	}
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, RTE_PCI_DRV_INTR_LSC))
+		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
+
 	vtpci_reset(hw);
 	hw->started = 0;
 	virtio_dev_free_mbufs(dev);
@@ -1001,7 +999,7 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 	isr = vtpci_isr(hw);
 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI)
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
 			PMD_DRV_LOG(ERR, "interrupt enable failed");
 
@@ -1056,9 +1054,10 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	pci_dev = eth_dev->pci_dev;
 
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		if (vtpci_init(pci_dev, hw) < 0)
 			return -1;
+	}
 
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
@@ -1072,7 +1071,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 		return -1;
 
 	/* If host does not support status then disable LSC */
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI) {
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
 			pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
 
@@ -1154,13 +1153,14 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
 			hw->max_rx_queues, hw->max_tx_queues);
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI) {
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
 			     eth_dev->data->port_id, pci_dev->id.vendor_id,
 			     pci_dev->id.device_id);
 
 		/* Setup interrupt callback  */
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI,
+					NULL, RTE_PCI_DRV_INTR_LSC))
 			rte_intr_callback_register(&pci_dev->intr_handle,
 						   virtio_interrupt_handler,
 						   eth_dev);
@@ -1197,11 +1197,11 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->data->mac_addrs = NULL;
 
 	/* reset interrupt callback  */
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-			rte_intr_callback_unregister(&pci_dev->intr_handle,
-						     virtio_interrupt_handler,
-						     eth_dev);
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, RTE_PCI_DRV_INTR_LSC))
+		rte_intr_callback_unregister(&pci_dev->intr_handle,
+					     virtio_interrupt_handler,
+					     eth_dev);
+
 	rte_eal_pci_unmap_device(pci_dev);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
@@ -1248,7 +1248,6 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 {
 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	PMD_INIT_LOG(DEBUG, "configure");
 
@@ -1266,12 +1265,11 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 		return -ENOTSUP;
 	}
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI) {
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-			if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
-				PMD_DRV_LOG(ERR, "failed to set config vector");
-				return -EBUSY;
-			}
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, RTE_PCI_DRV_INTR_LSC)) {
+		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
+			PMD_DRV_LOG(ERR, "failed to set config vector");
+			return -EBUSY;
+		}
 	}
 
 	return 0;
@@ -1283,11 +1281,11 @@ virtio_dev_start(struct rte_eth_dev *dev)
 {
 	uint16_t nb_queues, i;
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
-		if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+		if (!virtio_dev_check(dev, RTE_ETH_DEV_PCI,
+					NULL, RTE_PCI_DRV_INTR_LSC)) {
 			PMD_DRV_LOG(ERR, "link status not supported by host");
 			return -ENOTSUP;
 		}
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index fde77ca..223b56d 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -67,6 +67,38 @@
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1ULL << VIRTIO_F_VERSION_1)
 
+static inline int
+virtio_dev_check(struct rte_eth_dev *dev, enum rte_eth_dev_type type,
+		const char *name, uint64_t flags)
+{
+	int ret;
+
+	if (dev == NULL)
+		return 0;
+
+	if (type != 0)
+		ret = (dev->dev_type == type);
+	else
+		ret = 1;
+
+	if (name != 0)
+		ret &= (strncmp(dev->data->name, name, strlen(name)) == 0);
+	else
+		ret &= 1;
+
+	if (flags != 0) {
+		if (dev->dev_type == RTE_ETH_DEV_PCI)
+			ret &= (dev->pci_dev->driver->drv_flags & flags) ? 1 : 0;
+		else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+			ret &= (dev->data->dev_flags & flags) ? 1 : 0;
+		else
+			ret = 0;
+	} else
+		ret &= 1;
+
+	return ret;
+}
+
 /*
  * CQ function prototype
  */
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
                         ` (4 preceding siblings ...)
  2016-02-22  8:17       ` [PATCH v3 2/6] vhost: Add a function to check virtio device type Tetsuya Mukawa
@ 2016-02-22  8:17       ` Tetsuya Mukawa
  2016-03-04  2:20         ` Tan, Jianfeng
                           ` (13 more replies)
  2016-02-22  8:17       ` [PATCH v3 4/6] EAL: Add a new "--align-memsize" option Tetsuya Mukawa
                         ` (2 subsequent siblings)
  8 siblings, 14 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-22  8:17 UTC (permalink / raw)
  To: dev

The option specifies how to mmap EAL memory.
If the option is specified like '--range-virtaddr=<addr1>-<addr2>',
EAL will check /proc/maps, then tries to find free region between addr1
and addr2. If a region is found, EAL will treat it as if 'base-virtaddr'
is specified. Because of this, the option will not work with
'--base-virtaddr'.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_eal/common/eal_common_options.c |  9 ++++
 lib/librte_eal/common/eal_internal_cfg.h   |  2 +
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/linuxapp/eal/eal.c          | 39 ++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 82 +++++++++++++++++++++++++++++-
 5 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 65bccbd..3b4f789 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -74,6 +74,7 @@ eal_short_options[] =
 const struct option
 eal_long_options[] = {
 	{OPT_BASE_VIRTADDR,     1, NULL, OPT_BASE_VIRTADDR_NUM    },
+	{OPT_RANGE_VIRTADDR,    1, NULL, OPT_RANGE_VIRTADDR_NUM   },
 	{OPT_CREATE_UIO_DEV,    0, NULL, OPT_CREATE_UIO_DEV_NUM   },
 	{OPT_FILE_PREFIX,       1, NULL, OPT_FILE_PREFIX_NUM      },
 	{OPT_HELP,              0, NULL, OPT_HELP_NUM             },
@@ -137,6 +138,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
 		internal_cfg->hugepage_info[i].lock_descriptor = -1;
 	internal_cfg->base_virtaddr = 0;
+	internal_cfg->range_virtaddr_start = 0;
+	internal_cfg->range_virtaddr_end = 0;
 
 	internal_cfg->syslog_facility = LOG_DAEMON;
 	/* default value from build option */
@@ -985,6 +988,12 @@ eal_check_common_options(struct internal_config *internal_cfg)
 		return -1;
 	}
 
+	if (internal_cfg->base_virtaddr && internal_cfg->range_virtaddr_end) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_RANGE_VIRTADDR" cannot "
+			"be specified together with --"OPT_BASE_VIRTADDR"\n");
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 9117ed9..0734630 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -78,6 +78,8 @@ struct internal_config {
 	volatile unsigned force_sockets;
 	volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
 	uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
+	uintptr_t range_virtaddr_start;   /**< start address of mappable region */
+	uintptr_t range_virtaddr_end;     /**< end address of mappable region */
 	volatile int syslog_facility;	  /**< facility passed to openlog() */
 	volatile uint32_t log_level;	  /**< default log level */
 	/** default interrupt mode for VFIO */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index e5da14a..8e4cf1d 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -47,6 +47,8 @@ enum {
 	OPT_LONG_MIN_NUM = 256,
 #define OPT_BASE_VIRTADDR     "base-virtaddr"
 	OPT_BASE_VIRTADDR_NUM,
+#define OPT_RANGE_VIRTADDR    "range-virtaddr"
+	OPT_RANGE_VIRTADDR_NUM,
 #define OPT_CREATE_UIO_DEV    "create-uio-dev"
 	OPT_CREATE_UIO_DEV_NUM,
 #define OPT_FILE_PREFIX       "file-prefix"
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 82f34f7..80f1995 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -444,6 +444,35 @@ eal_parse_base_virtaddr(const char *arg)
 }
 
 static int
+eal_parse_range_virtaddr(const char *range)
+{
+	char *p, *endptr;
+	uint64_t tmp_start, tmp_end;
+
+	p = strchr(range, '-');
+	if (p == NULL)
+		return -1;
+	*p++ = '\0';
+
+	errno = 0;
+	tmp_start = strtoul(range, &endptr, 0);
+	if ((errno != 0) || endptr == NULL || (*endptr != '\0'))
+		return -1;
+
+	tmp_end = strtoul(p, &endptr, 0);
+	if ((errno != 0) || endptr == NULL || (*endptr != '\0'))
+		return -1;
+
+	if (tmp_start >= tmp_end)
+		return -1;
+
+	internal_config.range_virtaddr_start = tmp_start;
+	internal_config.range_virtaddr_end = tmp_end;
+
+	return 0;
+}
+
+static int
 eal_parse_vfio_intr(const char *mode)
 {
 	unsigned i;
@@ -604,6 +633,16 @@ eal_parse_args(int argc, char **argv)
 			}
 			break;
 
+		case OPT_RANGE_VIRTADDR_NUM:
+			if (eal_parse_range_virtaddr(optarg) < 0) {
+				RTE_LOG(ERR, EAL, "invalid parameter for --"
+						OPT_RANGE_VIRTADDR "\n");
+				eal_usage(prgname);
+				ret = -1;
+				goto out;
+			}
+			break;
+
 		case OPT_VFIO_INTR_NUM:
 			if (eal_parse_vfio_intr(optarg) < 0) {
 				RTE_LOG(ERR, EAL, "invalid parameters for --"
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a6b3616..d608273 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -251,6 +251,66 @@ aslr_enabled(void)
 }
 
 /*
+ * Find memory space that fits user request.
+ */
+static uintptr_t
+rte_eal_get_free_region(uint64_t pagesz)
+{
+	uint64_t alloc_size, start, end, next_start;
+	uint64_t low_limit, high_limit;
+	uintptr_t addr = 0;
+	char buf[1024], *p;
+	FILE *fp;
+
+	alloc_size = internal_config.memory;
+	low_limit = internal_config.range_virtaddr_start;
+	high_limit = internal_config.range_virtaddr_end;
+
+	/* allocation size should be aligned by page size */
+	if (alloc_size != RTE_ALIGN_CEIL(alloc_size, pagesz)) {
+		rte_panic("Invalid allocation size 0x%lx\n", alloc_size);
+		return NULL;
+	}
+
+	fp = fopen("/proc/self/maps", "r");
+	if (fp == NULL) {
+		rte_panic("Cannot open /proc/self/maps\n");
+		return NULL;
+	}
+
+	next_start = 0;
+	do {
+		start = next_start;
+
+		if ((p = fgets(buf, sizeof(buf), fp)) != NULL) {
+			if (sscanf(p, "%lx-%lx ", &end, &next_start) < 2)
+				break;
+
+			next_start = RTE_ALIGN_CEIL(next_start, alloc_size);
+			end = RTE_ALIGN_CEIL(end, alloc_size) - 1;
+		} else
+			end = UINT64_MAX;
+
+		if (start >= high_limit)
+			break;
+		if (end < low_limit)
+			continue;
+
+		start = RTE_MAX(start, low_limit);
+		end = RTE_MIN(end, high_limit - 1);
+
+		if (end - start >= alloc_size - 1) {
+			addr = start;
+			break;
+		}
+	} while (end != UINT64_MAX);
+
+	fclose(fp);
+
+	return addr;
+}
+
+/*
  * Try to mmap *size bytes in /dev/zero. If it is successful, return the
  * pointer to the mmap'd area and keep *size unmodified. Else, retry
  * with a smaller zone: decrease *size by hugepage_sz until it reaches
@@ -1126,6 +1186,25 @@ rte_eal_hugepage_init(void)
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
 
+	if (internal_config.range_virtaddr_end) {
+		uint64_t pagesize = RTE_PGSIZE_4K;
+		struct hugepage_info *hpi;
+		unsigned n;
+		uintptr_t addr;
+
+		/* determine maximum hugepage size */
+		for (n = 0; n < internal_config.num_hugepage_sizes; n++) {
+			hpi = &internal_config.hugepage_info[n];
+			pagesize = RTE_MAX(hpi->hugepage_sz, pagesize);
+		}
+
+		addr = rte_eal_get_free_region(pagesize);
+		if (addr == 0)
+			RTE_LOG(WARNING, EAL,
+				"no free space to mmap in specified region\n");
+		internal_config.base_virtaddr = addr;
+	}
+
 	/* when hugetlbfs is disabled or single-file option is specified */
 	if (internal_config.no_hugetlbfs || internal_config.single_file) {
 		int fd;
@@ -1158,7 +1237,8 @@ rte_eal_hugepage_init(void)
 			return -1;
 		}
 
-		addr = mmap(NULL, internal_config.memory,
+		addr = mmap((void *)internal_config.base_virtaddr,
+			    internal_config.memory,
 			    PROT_READ | PROT_WRITE,
 			    MAP_SHARED | MAP_POPULATE, fd, 0);
 		if (addr == MAP_FAILED) {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v3 4/6] EAL: Add a new "--align-memsize" option
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
                         ` (5 preceding siblings ...)
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
@ 2016-02-22  8:17       ` Tetsuya Mukawa
  2016-02-22  8:17       ` [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD Tetsuya Mukawa
  2016-02-22  8:17       ` [PATCH v3 6/6] docs: add release note for qtest virtio container support Tetsuya Mukawa
  8 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-22  8:17 UTC (permalink / raw)
  To: dev

The option will work with "--range-virtaddr", and if the option is
specified, mapped address will be align by EAL memory size.
Such an alignment is required for using virtio-net PMD extension
on container that uses QEMU QTest framework.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_eal/common/eal_common_options.c | 8 ++++++++
 lib/librte_eal/common/eal_internal_cfg.h   | 1 +
 lib/librte_eal/common/eal_options.h        | 2 ++
 lib/librte_eal/linuxapp/eal/eal.c          | 4 ++++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 9 +++++++++
 5 files changed, 24 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 3b4f789..853420a 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -75,6 +75,7 @@ const struct option
 eal_long_options[] = {
 	{OPT_BASE_VIRTADDR,     1, NULL, OPT_BASE_VIRTADDR_NUM    },
 	{OPT_RANGE_VIRTADDR,    1, NULL, OPT_RANGE_VIRTADDR_NUM   },
+	{OPT_ALIGN_MEMSIZE,     0, NULL, OPT_ALIGN_MEMSIZE_NUM    },
 	{OPT_CREATE_UIO_DEV,    0, NULL, OPT_CREATE_UIO_DEV_NUM   },
 	{OPT_FILE_PREFIX,       1, NULL, OPT_FILE_PREFIX_NUM      },
 	{OPT_HELP,              0, NULL, OPT_HELP_NUM             },
@@ -140,6 +141,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->base_virtaddr = 0;
 	internal_cfg->range_virtaddr_start = 0;
 	internal_cfg->range_virtaddr_end = 0;
+	internal_cfg->align_memsize = 0;
 
 	internal_cfg->syslog_facility = LOG_DAEMON;
 	/* default value from build option */
@@ -994,6 +996,12 @@ eal_check_common_options(struct internal_config *internal_cfg)
 		return -1;
 	}
 
+	if (internal_cfg->range_virtaddr_end == 0 && internal_cfg->align_memsize) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_RANGE_VIRTADDR" should be "
+			"specified together with --"OPT_ALIGN_MEMSIZE"\n");
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 0734630..df33a9f 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -80,6 +80,7 @@ struct internal_config {
 	uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
 	uintptr_t range_virtaddr_start;   /**< start address of mappable region */
 	uintptr_t range_virtaddr_end;     /**< end address of mappable region */
+	volatile unsigned align_memsize;  /**< true to align virtaddr by memory size */
 	volatile int syslog_facility;	  /**< facility passed to openlog() */
 	volatile uint32_t log_level;	  /**< default log level */
 	/** default interrupt mode for VFIO */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 8e4cf1d..9e36f68 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -49,6 +49,8 @@ enum {
 	OPT_BASE_VIRTADDR_NUM,
 #define OPT_RANGE_VIRTADDR    "range-virtaddr"
 	OPT_RANGE_VIRTADDR_NUM,
+#define OPT_ALIGN_MEMSIZE     "align-memsize"
+	OPT_ALIGN_MEMSIZE_NUM,
 #define OPT_CREATE_UIO_DEV    "create-uio-dev"
 	OPT_CREATE_UIO_DEV_NUM,
 #define OPT_FILE_PREFIX       "file-prefix"
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 80f1995..095e866 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -643,6 +643,10 @@ eal_parse_args(int argc, char **argv)
 			}
 			break;
 
+		case OPT_ALIGN_MEMSIZE_NUM:
+			internal_config.align_memsize = 1;
+			break;
+
 		case OPT_VFIO_INTR_NUM:
 			if (eal_parse_vfio_intr(optarg) < 0) {
 				RTE_LOG(ERR, EAL, "invalid parameters for --"
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index d608273..221c358 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -272,6 +272,15 @@ rte_eal_get_free_region(uint64_t pagesz)
 		return NULL;
 	}
 
+	if (internal_config.align_memsize) {
+		/*
+		 * Typically, BAR register of PCI device requiers such
+		 * an alignment.
+		 */
+		low_limit = RTE_ALIGN_CEIL(low_limit, alloc_size);
+		high_limit = RTE_ALIGN_FLOOR(high_limit, alloc_size);
+	}
+
 	fp = fopen("/proc/self/maps", "r");
 	if (fp == NULL) {
 		rte_panic("Cannot open /proc/self/maps\n");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
                         ` (6 preceding siblings ...)
  2016-02-22  8:17       ` [PATCH v3 4/6] EAL: Add a new "--align-memsize" option Tetsuya Mukawa
@ 2016-02-22  8:17       ` Tetsuya Mukawa
  2016-03-04  2:18         ` Tan, Jianfeng
  2016-02-22  8:17       ` [PATCH v3 6/6] docs: add release note for qtest virtio container support Tetsuya Mukawa
  8 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-22  8:17 UTC (permalink / raw)
  To: dev

The patch adds a new virtio-net PMD configuration that allows the PMD to
work on host as if the PMD is in VM.
Here is new configuration for virtio-net PMD.
 - CONFIG_RTE_VIRTIO_VDEV_QTEST
To use this mode, EAL needs map all hugepages as one file. Also the file
should be mapped between (1 << 31) and (1 << 44). And start address
should be aligned by EAL memory size.

To allocate like above, use below options.
 --single-file
 --range-virtaddr=0x80000000-0x100000000000
 --align-memsize
If a free regions isn't found, EAL will return error.

To prepare virtio-net device on host, the users need to invoke QEMU
process in special qtest mode. This mode is mainly used for testing QEMU
devices from outer process. In this mode, no guest runs.
Here is QEMU command line.

 $ qemu-system-x86_64 \
     -machine pc-i440fx-1.4,accel=qtest \
     -display none -qtest-log /dev/null \
     -qtest unix:/tmp/socket,server \
     -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
     -device virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
     -chardev socket,id=chr1,path=/tmp/ivshmem,server \
     -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4

 * Should use qemu-2.5.1, or above.
 * QEMU process is needed per port.
 * virtio-1.0 device are only supported.
 * The vhost backends like vhost-net and vhost-user can be specified.
 * In most cases, just using above command is enough, but you can also
   specify other QEMU virtio-net options.
 * Only checked "pc-i440fx-1.4" machine, but may work with other
   machines.
 * Should not add "--enable-kvm" to QEMU command line.

After invoking QEMU, the PMD can connect to QEMU process using unix
domain sockets. Over these sockets, virtio-net, ivshmem and piix3
device in QEMU are probed by the PMD.
Here is example of command line.

 $ testpmd -c f -n 1 -m 1024 --no-pci --single-file \
      --range-virtaddr=0x80000000-0x100000000000 --align-memsize \
      --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
      -- --disable-hw-vlan --txqflags=0xf00 -i

Please specify same unix domain sockets and memory size in both QEMU
and DPDK command lines like above.
The share memory size should be power of 2, because ivshmem only
accepts such memory size.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 config/common_linuxapp             |    1 +
 drivers/net/virtio/Makefile        |    4 +
 drivers/net/virtio/qtest.c         | 1342 ++++++++++++++++++++++++++++++++++++
 drivers/net/virtio/qtest.h         |   65 ++
 drivers/net/virtio/virtio_ethdev.c |  383 +++++++++-
 drivers/net/virtio/virtio_pci.c    |  364 +++++++++-
 drivers/net/virtio/virtio_pci.h    |    5 +-
 7 files changed, 2122 insertions(+), 42 deletions(-)
 create mode 100644 drivers/net/virtio/qtest.c
 create mode 100644 drivers/net/virtio/qtest.h

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 452f39c..f6e53bc 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -533,3 +533,4 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
 # Enable virtio support for container
 #
 CONFIG_RTE_VIRTIO_VDEV=y
+CONFIG_RTE_VIRTIO_VDEV_QTEST=y
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index ef920f9..6c11378 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -56,6 +56,10 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
 	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost_embedded.c
 endif
 
+ifeq ($(CONFIG_RTE_VIRTIO_VDEV_QTEST),y)
+	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
+endif
+
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
new file mode 100644
index 0000000..061aab5
--- /dev/null
+++ b/drivers/net/virtio/qtest.c
@@ -0,0 +1,1342 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/queue.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/eventfd.h>
+#include <linux/pci_regs.h>
+
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+#include <rte_interrupts.h>
+
+#include "virtio_pci.h"
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "qtest.h"
+
+#define NB_BAR                          6
+
+/* PIIX3 configuration registers */
+#define PIIX3_REG_ADDR_PIRQA            0x60
+#define PIIX3_REG_ADDR_PIRQB            0x61
+#define PIIX3_REG_ADDR_PIRQC            0x62
+#define PIIX3_REG_ADDR_PIRQD            0x63
+
+/* Device information */
+#define VIRTIO_NET_DEVICE_ID            0x1000
+#define VIRTIO_NET_VENDOR_ID            0x1af4
+#define VIRTIO_NET_IRQ_NUM              10
+#define IVSHMEM_DEVICE_ID               0x1110
+#define IVSHMEM_VENDOR_ID               0x1af4
+#define IVSHMEM_PROTOCOL_VERSION        0
+#define PIIX3_DEVICE_ID                 0x7000
+#define PIIX3_VENDOR_ID                 0x8086
+
+/* ------------------------------------------------------------
+ * IO port mapping of qtest guest
+ * ------------------------------------------------------------
+ * 0x0000 - 0xbfff : not used
+ * 0xc000 - 0xc03f : virtio-net(BAR0)
+ * 0xc040 - 0xffff : not used
+ *
+ * ------------------------------------------------------------
+ * Memory mapping of qtest quest
+ * ------------------------------------------------------------
+ * 0x00000000_00000000 - 0x00000000_3fffffff : not used
+ * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
+ * 0x00000000_40001000 - 0x00000000_40ffffff : not used
+ * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
+ * 0x00000000_41800000 - 0x00000000_41ffffff : not used
+ * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
+ * 0x00000000_42000100 - 0x00000000_42ffffff : not used
+ * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)
+ *
+ * We can only specify start address of a region. The region size
+ * will be defined by the device implementation in QEMU.
+ * The size will be pow of 2 according to the PCI specification.
+ * Also, the region start address should be aligned by region size.
+ *
+ * BAR2 of ivshmem will be used to mmap DPDK application memory.
+ * So this address will be dynamically changed, but not to overlap
+ * others, it should be mmaped between above addresses. Such allocation
+ * is done by EAL. Check rte_eal_get_free_region() also.
+ */
+#define VIRTIO_NET_IO_START             0xc000
+#define VIRTIO_NET_MEMORY1_START	0x40000000
+#define VIRTIO_NET_MEMORY2_START	0x41000000
+#define IVSHMEM_MEMORY_START            0x42000000
+
+#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
+	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
+	((_function) & 0x7) << 8 | ((_offset) & 0xfc))
+
+static char interrupt_message[32];
+
+enum qtest_pci_bar_type {
+	QTEST_PCI_BAR_DISABLE = 0,
+	QTEST_PCI_BAR_IO,
+	QTEST_PCI_BAR_MEMORY_UNDER_1MB,
+	QTEST_PCI_BAR_MEMORY_32,
+	QTEST_PCI_BAR_MEMORY_64
+};
+
+struct qtest_pci_bar {
+	enum qtest_pci_bar_type type;
+	uint8_t addr;
+	uint64_t region_start;
+	uint64_t region_size;
+};
+
+struct qtest_session;
+TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
+struct qtest_pci_device {
+	TAILQ_ENTRY(qtest_pci_device) next;
+	const char *name;
+	uint16_t device_id;
+	uint16_t vendor_id;
+	uint8_t bus_addr;
+	uint8_t device_addr;
+	struct qtest_pci_bar bar[NB_BAR];
+	int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
+};
+
+union qtest_pipefds {
+	struct {
+		int pipefd[2];
+	};
+	struct {
+		int readfd;
+		int writefd;
+	};
+};
+
+struct qtest_session {
+	int qtest_socket;
+	pthread_mutex_t qtest_session_lock;
+
+	struct qtest_pci_device_list head;
+	int ivshmem_socket;
+
+	pthread_t event_th;
+	char *evq;
+	char *evq_dequeue_ptr;
+	size_t evq_total_len;
+
+	union qtest_pipefds msgfds;
+
+	pthread_t intr_th;
+	int eventfd;
+	rte_atomic16_t enable_intr;
+	rte_intr_callback_fn cb;
+	void *cb_arg;
+	struct rte_eth_dev_data *eth_data;
+};
+
+static int
+qtest_raw_send(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = write(fd, buf, len);
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			return ret;
+		}
+		if (ret == (int)len)
+			break;
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+static int
+qtest_raw_recv(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = read(fd, buf, len);
+		if (ret <= 0) {
+			if (errno == EINTR) {
+				continue;
+			}
+			return ret;
+		}
+		if (ret == (int)len)
+			break;
+		if (*(buf + ret - 1) == '\n')
+			break;
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+/*
+ * To know QTest protocol specification, see below QEMU source code.
+ *  - qemu/qtest.c
+ * If qtest socket is closed, qtest_raw_in and qtest_raw_read will return 0.
+ */
+static uint32_t
+qtest_raw_in(struct qtest_session *s, uint16_t addr, char type)
+{
+	char buf[64];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "in%c 0x%x\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_out(struct qtest_session *s, uint16_t addr, uint32_t val, char type)
+{
+	char buf[64];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "out%c 0x%x 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+static uint32_t
+qtest_raw_read(struct qtest_session *s, uint64_t addr, char type)
+{
+	char buf[64];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "read%c 0x%lx\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_write(struct qtest_session *s, uint64_t addr, uint32_t val, char type)
+{
+	char buf[64];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "write%c 0x%lx 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+/*
+ * qtest_pci_inX/outX are used for accessing PCI configuration space.
+ * The functions are implemented based on PCI configuration space
+ * specification.
+ * Accroding to the spec, access size of read()/write() should be 4 bytes.
+ */
+static int
+qtest_pci_inb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return (tmp >> ((offset & 0x3) * 8)) & 0xff;
+}
+
+static void
+qtest_pci_outb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint8_t value)
+{
+	uint32_t addr, tmp, pos;
+
+	addr = PCI_CONFIG_ADDR(bus, device, function, offset);
+	pos = (offset % 4) * 8;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+	tmp = (tmp & ~(0xff << pos)) | (value << pos);
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	qtest_raw_out(s, 0xcfc, tmp, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint32_t
+qtest_pci_inl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return tmp;
+}
+
+static void
+qtest_pci_outl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint32_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, value, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint64_t
+qtest_pci_inq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+	uint64_t val;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val = (uint64_t)qtest_raw_in(s, 0xcfc, 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val |= (uint64_t)qtest_raw_in(s, 0xcfc, 'l') << 32;
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return val;
+}
+
+static void
+qtest_pci_outq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint64_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value & 0xffffffff), 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value >> 32), 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+/*
+ * qtest_in/out are used for accessing ioport of qemu guest.
+ * qtest_read/write are used for accessing memory of qemu guest.
+ */
+uint32_t
+qtest_in(struct virtio_hw *hw, uint16_t addr, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_in(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_out(struct virtio_hw *hw, uint16_t addr, uint64_t val, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+uint32_t
+qtest_read(struct virtio_hw *hw, uint64_t addr, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_read(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_write(struct virtio_hw *hw, uint64_t addr, uint64_t val, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_write(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+static struct qtest_pci_device *
+qtest_find_device(struct qtest_session *s, const char *name)
+{
+	struct qtest_pci_device *dev;
+
+	TAILQ_FOREACH(dev, &s->head, next) {
+		if (strcmp(dev->name, name) == 0)
+			return dev;
+	}
+	return NULL;
+}
+
+/*
+ * The function is used for reading pci configuration space of specifed device.
+ */
+int
+qtest_read_pci_cfg(struct virtio_hw *hw, const char *name,
+		void *buf, size_t len, off_t offset)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	struct qtest_pci_device *dev;
+	uint32_t i;
+	uint8_t *p = buf;
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s\n", name);
+		return -1;
+	}
+
+	for (i = 0; i < len; i++) {
+		*(p + i) = qtest_pci_inb(s,
+				dev->bus_addr, dev->device_addr, 0, offset + i);
+	}
+
+	return 0;
+}
+
+static struct qtest_pci_bar *
+qtest_get_bar(struct virtio_hw *hw, const char *name, uint8_t bar)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	struct qtest_pci_device *dev;
+
+	if (bar >= NB_BAR) {
+		PMD_DRV_LOG(ERR, "Invalid bar is specified: %u\n", bar);
+		return NULL;
+	}
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s\n", name);
+		return NULL;
+	}
+
+	if (dev->bar[bar].type == QTEST_PCI_BAR_DISABLE) {
+		PMD_DRV_LOG(ERR, "Cannot find valid BAR(%s): %u\n", name, bar);
+		return NULL;
+	}
+
+	return &dev->bar[bar];
+}
+
+int
+qtest_get_bar_addr(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t **addr)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(hw, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*addr = (uint64_t *)bar_ptr->region_start;
+	return 0;
+}
+
+int
+qtest_get_bar_size(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t *size)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(hw, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*size = bar_ptr->region_size;
+	return 0;
+}
+
+int
+qtest_intr_enable(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 1);
+
+	return 0;
+}
+
+int
+qtest_intr_disable(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 0);
+
+	return 0;
+}
+
+void
+qtest_intr_callback_register(void *data,
+		rte_intr_callback_fn cb, void *cb_arg)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	s->cb = cb;
+	s->cb_arg = cb_arg;
+	rte_atomic16_set(&s->enable_intr, 1);
+}
+
+void
+qtest_intr_callback_unregister(void *data,
+		rte_intr_callback_fn cb __rte_unused,
+		void *cb_arg __rte_unused)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->cb = NULL;
+	s->cb_arg = NULL;
+}
+
+static void *
+qtest_intr_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	eventfd_t value;
+	int ret;
+
+	for (;;) {
+		ret = eventfd_read(s->eventfd, &value);
+		if (ret < 0)
+			return NULL;
+		s->cb(NULL, s->cb_arg);
+	}
+	return NULL;
+}
+
+static int
+qtest_intr_initialize(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+	char buf[64];
+	int ret;
+
+	s = (struct qtest_session *)hw->qsession;
+
+	/* This message will come when interrupt occurs */
+	snprintf(interrupt_message, sizeof(interrupt_message),
+			"IRQ raise %d", VIRTIO_NET_IRQ_NUM);
+
+	snprintf(buf, sizeof(buf), "irq_intercept_in ioapic\n");
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	/* To enable interrupt, send "irq_intercept_in" message to QEMU */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	/* just ignore QEMU response */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return 0;
+}
+
+static void
+qtest_event_send(struct qtest_session *s, char *buf)
+{
+	int ret;
+
+	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
+		if (rte_atomic16_read(&s->enable_intr) == 0)
+			return;
+
+		/* relay interrupt to eventfd */
+		ret = eventfd_write(s->eventfd, 1);
+		if (ret < 0)
+			rte_panic("cannot relay interrupt\n");
+	} else {
+		/* relay normal message to pipe */
+		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
+		if (ret < 0)
+			rte_panic("cannot relay normal message\n");
+	}
+}
+
+static void
+qtest_close_one_socket(int *fd)
+{
+	if (*fd > 0) {
+		close(*fd);
+		*fd = -1;
+	}
+}
+
+static void
+qtest_close_sockets(struct qtest_session *s)
+{
+	qtest_close_one_socket(&s->qtest_socket);
+	qtest_close_one_socket(&s->msgfds.readfd);
+	qtest_close_one_socket(&s->msgfds.writefd);
+	qtest_close_one_socket(&s->eventfd);
+	qtest_close_one_socket(&s->ivshmem_socket);
+}
+
+static void
+qtest_event_enqueue(struct qtest_session *s, char *buf)
+{
+	size_t len = strlen(buf);
+	char *dest;
+
+	if (s->evq == NULL) {
+		/* allocate one more byte for '\0' */
+		s->evq = malloc(len + 1);
+		if (s->evq == NULL)
+			rte_panic("Cannot allocate memory\n");
+
+		s->evq_dequeue_ptr = s->evq;
+		s->evq_total_len = len + 1;
+		dest = s->evq;
+	} else {
+		size_t offset = s->evq_dequeue_ptr - s->evq;
+
+		s->evq = realloc(s->evq, s->evq_total_len + len);
+		if (s->evq == NULL)
+			rte_panic("Cannot re-allocate memory\n");
+
+		s->evq_dequeue_ptr = s->evq + offset;
+		dest = s->evq + s->evq_total_len - 1;
+		s->evq_total_len += len;
+	}
+
+	strncpy(dest, buf, len);
+	dest[len] = '\0';
+}
+
+static char *
+qtest_event_dequeue(struct qtest_session *s)
+{
+	char *head, *next_head;
+
+	head = s->evq_dequeue_ptr;
+
+	/* make sure message is terminated by '\n' */
+	next_head = strchr(s->evq_dequeue_ptr, '\n');
+	if (next_head == NULL)
+		return NULL;
+
+	/* set next dequeue pointer */
+	s->evq_dequeue_ptr = next_head + 1;
+
+	return head;
+}
+
+static void
+qtest_event_flush(struct qtest_session *s)
+{
+	if (s->evq) {
+		free(s->evq);
+		s->evq = NULL;
+		s->evq_dequeue_ptr = NULL;
+		s->evq_total_len = 0;
+	}
+}
+
+/*
+ * This thread relays QTest response using pipe and eventfd.
+ * The function is needed because we need to separate IRQ message from others.
+ */
+static void *
+qtest_event_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	char buf[64];
+	char *p;
+	int ret;
+
+	for (;;) {
+		memset(buf, 0, sizeof(buf));
+		ret = qtest_raw_recv(s->qtest_socket, buf, sizeof(buf));
+		if (ret <= 0) {
+			PMD_DRV_LOG(EMERG,
+				"Port %u: qtest connection was closed.\n"
+				"Please detach the port, then start QEMU "
+				"and attach the port again.\n",
+				s->eth_data->port_id);
+			qtest_close_sockets(s);
+			qtest_event_flush(s);
+			return NULL;
+		}
+
+		qtest_event_enqueue(s, buf);
+
+		/* in the case of incomplete message, receive again */
+		p = &buf[sizeof(buf) - 1];
+		if ((*p != '\0') && (*p != '\n'))
+			continue;
+
+		/* may receive multiple messages at the same time */
+		while ((p = qtest_event_dequeue(s)) != NULL)
+			qtest_event_send(s, p);
+
+		qtest_event_flush(s);
+	}
+	return NULL;
+}
+
+static int
+qtest_init_piix3_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t bus, device, virtio_net_slot = 0;
+	struct qtest_pci_device *tmpdev;
+	uint8_t pcislot2regaddr[] = {	0xff,
+					0xff,
+					0xff,
+					PIIX3_REG_ADDR_PIRQC,
+					PIIX3_REG_ADDR_PIRQD,
+					PIIX3_REG_ADDR_PIRQA,
+					PIIX3_REG_ADDR_PIRQB};
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Get slot id that is connected to virtio-net */
+	TAILQ_FOREACH(tmpdev, &s->head, next) {
+		if (strcmp(tmpdev->name, "virtio-net") == 0) {
+			virtio_net_slot = tmpdev->device_addr;
+			break;
+		}
+	}
+
+	if (virtio_net_slot == 0)
+		return -1;
+
+	/*
+	 * Set interrupt routing for virtio-net device.
+	 * Here is i440fx/piix3 connection settings
+	 * ---------------------------------------
+	 * PCI Slot3 -> PIRQC
+	 * PCI Slot4 -> PIRQD
+	 * PCI Slot5 -> PIRQA
+	 * PCI Slot6 -> PIRQB
+	 */
+	if (pcislot2regaddr[virtio_net_slot] != 0xff) {
+		qtest_pci_outb(s, bus, device, 0,
+				pcislot2regaddr[virtio_net_slot],
+				VIRTIO_NET_IRQ_NUM);
+	}
+
+	return 0;
+}
+
+/*
+ * Common initialization of PCI device.
+ * To know detail, see pci specification.
+ */
+static int
+qtest_init_pci_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t i, bus, device;
+	uint32_t val;
+	uint64_t val64;
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Check header type */
+	val = qtest_pci_inb(s, bus, device, 0, PCI_HEADER_TYPE);
+	if (val != PCI_HEADER_TYPE_NORMAL) {
+		PMD_DRV_LOG(ERR, "Unexpected header type %d\n", val);
+		return -1;
+	}
+
+	/* Check BAR type */
+	for (i = 0; i < NB_BAR; i++) {
+		val = qtest_pci_inl(s, bus, device, 0, dev->bar[i].addr);
+
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_IO)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_1M)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_32:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_32)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_64)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Enable device */
+	val = qtest_pci_inl(s, bus, device, 0, PCI_COMMAND);
+	val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	qtest_pci_outl(s, bus, device, 0, PCI_COMMAND, val);
+
+	/* Calculate BAR size */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffff);
+			val = qtest_pci_inl(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size = ~(val & 0xfffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffffffffffff);
+			val64 = qtest_pci_inq(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size =
+					~(val64 & 0xfffffffffffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Set BAR region */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+qtest_find_pci_device(struct qtest_session *s, const char *name,
+			struct rte_pci_addr *addr)
+{
+	struct qtest_pci_device *dev;
+	uint32_t val;
+
+	PMD_DRV_LOG(INFO, "PCI address of %s is %04x:%02x:%02x.%02x\n", name,
+			addr->domain, addr->bus, addr->devid, addr->function);
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL)
+		goto error;
+
+	val = qtest_pci_inl(s, addr->bus, addr->devid, addr->function, 0);
+	if (val == ((uint32_t)dev->device_id << 16 | dev->vendor_id)) {
+		dev->bus_addr = addr->bus;
+		dev->device_addr = addr->devid;
+		return 0;
+	}
+
+error:
+	PMD_DRV_LOG(ERR, "%s isn' found on %04x:%02x:%02x.%02x\n", name,
+			addr->domain, addr->bus, addr->devid, addr->function);
+	return -1;
+}
+
+static int
+qtest_init_pci_devices(struct qtest_session *s,
+			struct rte_pci_addr *virtio_addr,
+			struct rte_pci_addr *ivshmem_addr,
+			struct rte_pci_addr *piix3_addr)
+{
+	struct qtest_pci_device *dev;
+	int ret;
+
+
+	/* Try to find devices */
+	ret = qtest_find_pci_device(s, "virtio-net", virtio_addr);
+	if (ret < 0)
+		return -1;
+
+	ret = qtest_find_pci_device(s, "ivshmem", ivshmem_addr);
+	if (ret < 0)
+		return -1;
+
+	ret = qtest_find_pci_device(s, "piix3", piix3_addr);
+	if (ret < 0)
+		return -1;
+
+	/* Initialize devices */
+	TAILQ_FOREACH(dev, &s->head, next) {
+		ret = dev->init(s, dev);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+struct rte_pci_id
+qtest_get_pci_id_of_virtio_net(void)
+{
+	struct rte_pci_id id =  {VIRTIO_NET_DEVICE_ID,
+		VIRTIO_NET_VENDOR_ID, PCI_ANY_ID, PCI_ANY_ID};
+
+	return id;
+}
+
+static int
+qtest_register_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *virtio_net, *ivshmem, *piix3;
+	const struct rte_memseg *ms;
+
+	ms = rte_eal_get_physmem_layout();
+	/* if EAL memory size isn't pow of 2, ivshmem will refuse it */
+	if ((ms[0].len & (ms[0].len - 1)) != 0) {
+		PMD_DRV_LOG(ERR, "memory size must be power of 2\n");
+		return -1;
+	}
+
+	virtio_net = malloc(sizeof(*virtio_net));
+	if (virtio_net == NULL)
+		return -1;
+
+	ivshmem = malloc(sizeof(*ivshmem));
+	if (ivshmem == NULL)
+		return -1;
+
+	piix3 = malloc(sizeof(*piix3));
+	if (piix3 == NULL)
+		return -1;
+
+	memset(virtio_net, 0, sizeof(*virtio_net));
+	memset(ivshmem, 0, sizeof(*ivshmem));
+
+	TAILQ_INIT(&s->head);
+
+	virtio_net->name = "virtio-net";
+	virtio_net->device_id = VIRTIO_NET_DEVICE_ID;
+	virtio_net->vendor_id = VIRTIO_NET_VENDOR_ID;
+	virtio_net->init = qtest_init_pci_device;
+	virtio_net->bar[0].addr = PCI_BASE_ADDRESS_0;
+	virtio_net->bar[0].type = QTEST_PCI_BAR_IO;
+	virtio_net->bar[0].region_start = VIRTIO_NET_IO_START;
+	virtio_net->bar[1].addr = PCI_BASE_ADDRESS_1;
+	virtio_net->bar[1].type = QTEST_PCI_BAR_MEMORY_32;
+	virtio_net->bar[1].region_start = VIRTIO_NET_MEMORY1_START;
+	virtio_net->bar[4].addr = PCI_BASE_ADDRESS_4;
+	virtio_net->bar[4].type = QTEST_PCI_BAR_MEMORY_64;
+	virtio_net->bar[4].region_start = VIRTIO_NET_MEMORY2_START;
+	TAILQ_INSERT_TAIL(&s->head, virtio_net, next);
+
+	ivshmem->name = "ivshmem";
+	ivshmem->device_id = IVSHMEM_DEVICE_ID;
+	ivshmem->vendor_id = IVSHMEM_VENDOR_ID;
+	ivshmem->init = qtest_init_pci_device;
+	ivshmem->bar[0].addr = PCI_BASE_ADDRESS_0;
+	ivshmem->bar[0].type = QTEST_PCI_BAR_MEMORY_32;
+	ivshmem->bar[0].region_start = IVSHMEM_MEMORY_START;
+	ivshmem->bar[2].addr = PCI_BASE_ADDRESS_2;
+	ivshmem->bar[2].type = QTEST_PCI_BAR_MEMORY_64;
+	/* In host mode, only one memory segment is vaild */
+	ivshmem->bar[2].region_start = (uint64_t)ms[0].addr;
+	TAILQ_INSERT_TAIL(&s->head, ivshmem, next);
+
+	/* piix3 is needed to route irqs from virtio-net to ioapic */
+	piix3->name = "piix3";
+	piix3->device_id = PIIX3_DEVICE_ID;
+	piix3->vendor_id = PIIX3_VENDOR_ID;
+	piix3->init = qtest_init_piix3_device;
+	TAILQ_INSERT_TAIL(&s->head, piix3, next);
+
+	return 0;
+}
+
+static int
+qtest_send_message_to_ivshmem(int sock_fd, uint64_t client_id, int shm_fd)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = &client_id;
+	iov.iov_len = sizeof(client_id);
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (shm_fd >= 0) {
+		msgh.msg_control = &control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), &shm_fd, fdsize);
+	}
+
+	do {
+		ret = sendmsg(sock_fd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int
+qtest_setup_shared_memory(struct qtest_session *s)
+{
+	int shm_fd, num, ret;
+	struct back_file *huges;
+
+	num = rte_eal_get_backfile_info(&huges);
+	if (num != 1) {
+		PMD_DRV_LOG(ERR,
+			"Not supported memory configuration\n");
+		return -1;
+	}
+
+	shm_fd = open(huges[0].filepath, O_RDWR);
+	if (shm_fd < 0) {
+		PMD_DRV_LOG(ERR,
+			"Cannot open file: %s\n", huges[0].filepath);
+		return -1;
+	}
+
+	/* send our protocol version first */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket,
+			IVSHMEM_PROTOCOL_VERSION, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR,
+			"Failed to send protocol version to ivshmem\n");
+		return -1;
+	}
+
+	/* send client id */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, 0, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to send VMID to ivshmem\n");
+		return -1;
+	}
+
+	/* send message to ivshmem */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, -1, shm_fd);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to file descriptor to ivshmem\n");
+		return -1;
+	}
+
+	close(shm_fd);
+
+	return 0;
+}
+
+int
+qtest_vdev_init(struct rte_eth_dev_data *data,
+		int qtest_socket, int ivshmem_socket,
+		struct rte_pci_addr *virtio_addr,
+		struct rte_pci_addr *ivshmem_addr,
+		struct rte_pci_addr *piix3_addr)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+	int ret;
+
+	s = rte_zmalloc(NULL, sizeof(*s), RTE_CACHE_LINE_SIZE);
+
+	ret = pipe(s->msgfds.pipefd);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize message pipe\n");
+		return -1;
+	}
+
+	s->eventfd = eventfd(0, 0);
+	if (s->eventfd < 0) {
+		PMD_DRV_LOG(ERR, "Failed to open eventfd\n");
+		return -1;
+	}
+
+	ret = qtest_register_target_devices(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize qtest session\n");
+		return -1;
+	}
+
+	ret = pthread_mutex_init(&s->qtest_session_lock, NULL);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize mutex\n");
+		return -1;
+	}
+
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->qtest_socket = qtest_socket;
+	s->ivshmem_socket = ivshmem_socket;
+	s->eth_data = data;
+	hw->qsession = (void *)s;
+
+	ret = pthread_create(&s->event_th, NULL, qtest_event_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create event handler\n");
+		return -1;
+	}
+
+	ret = pthread_create(&s->intr_th, NULL, qtest_intr_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create interrupt handler\n");
+		return -1;
+	}
+
+	ret = qtest_intr_initialize(data);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize interrupt\n");
+		return -1;
+	}
+
+	ret = qtest_setup_shared_memory(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to setup shared memory\n");
+		return -1;
+	}
+
+	ret = qtest_init_pci_devices(s, virtio_addr, ivshmem_addr, piix3_addr);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize devices\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+qtest_remove_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *dev, *next;
+
+	for (dev = TAILQ_FIRST(&s->head); dev != NULL; dev = next) {
+		next = TAILQ_NEXT(dev, next);
+		TAILQ_REMOVE(&s->head, dev, next);
+		free(dev);
+	}
+}
+
+void
+qtest_vdev_uninit(struct rte_eth_dev_data *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+
+	qtest_close_sockets(s);
+	qtest_event_flush(s);
+
+	pthread_cancel(s->event_th);
+	pthread_join(s->event_th, NULL);
+
+	pthread_cancel(s->intr_th);
+	pthread_join(s->intr_th, NULL);
+
+	pthread_mutex_destroy(&s->qtest_session_lock);
+
+	qtest_remove_target_devices(s);
+
+	rte_free(s);
+	hw->qsession = NULL;
+}
diff --git a/drivers/net/virtio/qtest.h b/drivers/net/virtio/qtest.h
new file mode 100644
index 0000000..965e985
--- /dev/null
+++ b/drivers/net/virtio/qtest.h
@@ -0,0 +1,65 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_QTEST_H_
+#define _VIRTIO_QTEST_H_
+
+#include "virtio_pci.h"
+
+#define QTEST_DRV_NAME		"eth_qtest_virtio"
+
+int qtest_vdev_init(struct rte_eth_dev_data *data, int qtest_socket,
+		int ivshmem_socket, struct rte_pci_addr *virtio_addr,
+		struct rte_pci_addr *ivshmem_addr,
+		struct rte_pci_addr *piix3_addr);
+void qtest_vdev_uninit(struct rte_eth_dev_data *data);
+void qtest_intr_callback_register(void *data,
+		rte_intr_callback_fn cb, void *cb_arg);
+void qtest_intr_callback_unregister(void *data,
+		rte_intr_callback_fn cb, void *cb_arg);
+int qtest_intr_enable(void *data);
+int qtest_intr_disable(void *data);
+struct rte_pci_id qtest_get_pci_id_of_virtio_net(void);
+
+uint32_t qtest_in(struct virtio_hw *, uint16_t, char type);
+void qtest_out(struct virtio_hw *, uint16_t, uint64_t, char type);
+uint32_t qtest_read(struct virtio_hw *, uint64_t, char type);
+void qtest_write(struct virtio_hw *, uint64_t, uint64_t, char type);
+int qtest_read_pci_cfg(struct virtio_hw *hw, const char *name,
+		void *buf, size_t len, off_t offset);
+int qtest_get_bar_addr(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t **addr);
+int qtest_get_bar_size(struct virtio_hw *hw, const char *name,
+		uint8_t bar, uint64_t *size);
+
+#endif /* _VIRTIO_QTEST_H_ */
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index bc631c7..2bf0bdf 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -36,6 +36,10 @@
 #include <stdio.h>
 #include <errno.h>
 #include <unistd.h>
+#ifdef RTE_VIRTIO_VDEV_QTEST
+#include <sys/socket.h>
+#include <sys/un.h>
+#endif
 
 #include <rte_ethdev.h>
 #include <rte_memcpy.h>
@@ -60,6 +64,9 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+#include "qtest.h"
+#endif
 
 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
@@ -387,7 +394,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 			return -ENOMEM;
 		}
 	}
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_VDEV_QTEST)
 	else
 		vq->vq_ring_mem = (phys_addr_t)mz->addr; /* Use vaddr!!! */
 #endif
@@ -431,7 +438,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 		if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 			vq->virtio_net_hdr_mem = mz->phys_addr;
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_VDEV_QTEST)
 		else
 			vq->virtio_net_hdr_mem = (phys_addr_t)mz->addr;
 #endif
@@ -441,7 +448,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_VDEV_QTEST)
 	else
 		vq->offset = offsetof(struct rte_mbuf, buf_addr);
 #endif
@@ -999,6 +1006,23 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 	isr = vtpci_isr(hw);
 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	if (virtio_dev_check(dev, RTE_ETH_DEV_VIRTUAL, QTEST_DRV_NAME, 0)) {
+		if (qtest_intr_enable(dev->data) < 0)
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+		/*
+		 * If last qtest message is interrupt, 'isr' will be 0
+		 * becasue socket has been closed already.
+		 * But still we want to notice this event to EAL.
+		 * So just ignore isr value.
+		 */
+		if (virtio_dev_link_update(dev, 0) == 0)
+			_rte_eth_dev_callback_process(dev,
+					RTE_ETH_EVENT_INTR_LSC);
+		return;
+	}
+#endif
+
 	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
 			PMD_DRV_LOG(ERR, "interrupt enable failed");
@@ -1055,9 +1079,16 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	pci_dev = eth_dev->pci_dev;
 
 	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
-		if (vtpci_init(pci_dev, hw) < 0)
+		if (vtpci_init(eth_dev, hw) < 0)
 			return -1;
 	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev,
+				RTE_ETH_DEV_VIRTUAL, QTEST_DRV_NAME, 0)) {
+		if (vtpci_init(eth_dev, hw) < 0)
+			return -1;
+	}
+#endif
 
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
@@ -1077,6 +1108,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 		rte_eth_copy_pci_info(eth_dev, pci_dev);
 	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev,
+				RTE_ETH_DEV_VIRTUAL, QTEST_DRV_NAME, 0)) {
+		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
+			pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+	}
+#endif
 
 	rx_func_get(eth_dev);
 
@@ -1165,6 +1203,26 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 						   virtio_interrupt_handler,
 						   eth_dev);
 	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0)) {
+		struct rte_pci_id id;
+
+		id = qtest_get_pci_id_of_virtio_net();
+		RTE_SET_USED(id);
+
+		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
+				eth_dev->data->port_id,
+				id.vendor_id, id.device_id);
+
+		/* Setup interrupt callback  */
+		if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+					NULL, RTE_ETH_DEV_INTR_LSC))
+			qtest_intr_callback_register(eth_dev->data,
+					virtio_interrupt_handler, eth_dev);
+	}
+#endif
+
 	virtio_dev_cq_start(eth_dev);
 
 	return 0;
@@ -1202,7 +1260,15 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 					     virtio_interrupt_handler,
 					     eth_dev);
 
-	rte_eal_pci_unmap_device(pci_dev);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, RTE_ETH_DEV_INTR_LSC))
+		qtest_intr_callback_unregister(eth_dev->data,
+				virtio_interrupt_handler, eth_dev);
+#endif
+
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0))
+		rte_eal_pci_unmap_device(pci_dev);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
@@ -1284,16 +1350,34 @@ virtio_dev_start(struct rte_eth_dev *dev)
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
-		if (!virtio_dev_check(dev, RTE_ETH_DEV_PCI,
-					NULL, RTE_PCI_DRV_INTR_LSC)) {
+		int pdev_has_lsc = 0, vdev_has_lsc = 0;
+
+		pdev_has_lsc = virtio_dev_check(dev, RTE_ETH_DEV_PCI,
+				NULL, RTE_PCI_DRV_INTR_LSC);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+		vdev_has_lsc = virtio_dev_check(dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, RTE_ETH_DEV_INTR_LSC);
+#endif
+
+		if ((!pdev_has_lsc) && (!vdev_has_lsc)) {
 			PMD_DRV_LOG(ERR, "link status not supported by host");
 			return -ENOTSUP;
 		}
 
-		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
-			PMD_DRV_LOG(ERR, "interrupt enable failed");
-			return -EIO;
+		if (pdev_has_lsc) {
+			if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
 		}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+		else if (vdev_has_lsc) {
+			if (qtest_intr_enable(dev->data) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
+		}
+#endif
 	}
 
 	/* Initialize Link state */
@@ -1390,8 +1474,15 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
-	if (dev->data->dev_conf.intr_conf.lsc)
-		rte_intr_disable(&dev->pci_dev->intr_handle);
+	if (dev->data->dev_conf.intr_conf.lsc) {
+		if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
+			rte_intr_disable(&dev->pci_dev->intr_handle);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+		else if (virtio_dev_check(dev, RTE_ETH_DEV_VIRTUAL,
+					QTEST_DRV_NAME, 0))
+			qtest_intr_disable(dev->data);
+#endif
+	}
 
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
@@ -1628,3 +1719,271 @@ static struct rte_driver rte_cvio_driver = {
 PMD_REGISTER_DRIVER(rte_cvio_driver);
 
 #endif
+
+#ifdef RTE_VIRTIO_VDEV_QTEST
+
+#define ETH_VIRTIO_NET_ARG_QTEST_PATH           "qtest"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_PATH         "ivshmem"
+#define ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR      "virtio-net-addr"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR         "ivshmem-addr"
+#define ETH_VIRTIO_NET_ARG_PIIX3_ADDR           "piix3-addr"
+
+static const char *valid_qtest_args[] = {
+       ETH_VIRTIO_NET_ARG_QTEST_PATH,
+       ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
+       ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR,
+       ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR,
+       ETH_VIRTIO_NET_ARG_PIIX3_ADDR,
+       NULL
+};
+
+static int
+get_socket_path_arg(const char *key __rte_unused,
+               const char *value, void *extra_args)
+{
+       int ret, fd, loop = 100;
+       int *pfd = extra_args;
+       struct sockaddr_un sa = {0};
+
+       if ((value == NULL) || (extra_args == NULL))
+               return -EINVAL;
+
+       fd = socket(AF_UNIX, SOCK_STREAM, 0);
+       if (fd < 0)
+               return -1;
+
+       sa.sun_family = AF_UNIX;
+       strncpy(sa.sun_path, value, sizeof(sa.sun_path));
+
+       while (loop--) {
+               /*
+                * may need to wait for qtest and ivshmem
+                * sockets are prepared by QEMU.
+                */
+               ret = connect(fd, (struct sockaddr *)&sa,
+                               sizeof(struct sockaddr_un));
+               if (ret == 0)
+                       break;
+               else
+                       usleep(100000);
+       }
+
+       if (ret != 0) {
+               close(fd);
+               return -1;
+       }
+
+       *pfd = fd;
+
+       return 0;
+}
+
+static int
+get_pci_addr_arg(const char *key __rte_unused,
+               const char *value, void *extra_args)
+{
+       struct rte_pci_addr *addr = extra_args;
+
+       if ((value == NULL) || (extra_args == NULL))
+               return -EINVAL;
+
+       if (eal_parse_pci_DomBDF(value, addr) != 0)
+               return -1;
+
+       if (addr->domain != 0)
+               return -1;
+
+       return 0;
+}
+
+static struct rte_eth_dev *
+virtio_net_eth_dev_alloc(const char *name)
+{
+       struct rte_eth_dev *eth_dev;
+       struct rte_eth_dev_data *data;
+       struct virtio_hw *hw;
+
+       eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+       if (eth_dev == NULL)
+               rte_panic("cannot alloc rte_eth_dev\n");
+
+       data = eth_dev->data;
+
+       hw = rte_zmalloc(NULL, sizeof(*hw), 0);
+       if (!hw)
+               rte_panic("malloc virtio_hw failed\n");
+
+       data->dev_private = hw;
+       eth_dev->driver = &rte_virtio_pmd;
+       return eth_dev;
+}
+
+static int
+virtio_net_eth_pmd_parse_socket_path(struct rte_kvargs *kvlist,
+               const char *option, int *socket)
+{
+       int ret;
+
+       if (rte_kvargs_count(kvlist, option) == 1) {
+               ret = rte_kvargs_process(kvlist, option,
+                               &get_socket_path_arg, socket);
+               if (ret != 0) {
+                       PMD_INIT_LOG(ERR,
+                               "Failed to connect to %s socket", option);
+                       return -1;
+               }
+       } else {
+               PMD_INIT_LOG(ERR, "No argument specified for %s", option);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+virtio_net_eth_pmd_parse_pci_addr(struct rte_kvargs *kvlist,
+               const char *option, struct rte_pci_addr *addr,
+               struct rte_pci_addr *default_addr)
+{
+       int ret;
+
+       if (rte_kvargs_count(kvlist, option) == 1) {
+               ret = rte_kvargs_process(kvlist, option,
+                               &get_pci_addr_arg, addr);
+               if (ret != 0) {
+                       PMD_INIT_LOG(ERR,
+                               "Specified invalid address in '%s'", option);
+                       return -1;
+               }
+       } else
+               *addr = *default_addr;
+
+       return 0;
+}
+
+/*
+ * Initialization when "CONFIG_RTE_VIRTIO_VDEV_QTEST" is enabled.
+ */
+static int
+rte_qtest_virtio_pmd_init(const char *name, const char *params)
+{
+       struct rte_kvargs *kvlist = NULL;
+       struct rte_eth_dev *eth_dev = NULL;
+       int ret, qtest_sock = 0, ivshmem_sock = 0;
+       struct rte_pci_addr virtio_addr, ivshmem_addr, piix3_addr, default_addr;
+
+       if (params == NULL || params[0] == '\0')
+               goto error;
+
+       kvlist = rte_kvargs_parse(params, valid_qtest_args);
+       if (!kvlist) {
+               PMD_INIT_LOG(ERR, "error when parsing param");
+               return -EFAULT;
+       }
+
+       ret = virtio_net_eth_pmd_parse_socket_path(kvlist,
+                       ETH_VIRTIO_NET_ARG_IVSHMEM_PATH, &ivshmem_sock);
+       if (ret < 0)
+               goto error;
+
+       ret = virtio_net_eth_pmd_parse_socket_path(kvlist,
+                       ETH_VIRTIO_NET_ARG_QTEST_PATH, &qtest_sock);
+       if (ret < 0)
+               goto error;
+
+       default_addr.domain = 0;
+       default_addr.bus = 0;
+       default_addr.function = 0;
+
+       default_addr.devid = 3;
+       ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+                       ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR,
+                       &virtio_addr, &default_addr);
+       if (ret < 0)
+               goto error;
+
+       default_addr.devid = 4;
+       ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+                       ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR,
+                       &ivshmem_addr, &default_addr);
+       if (ret < 0)
+               goto error;
+
+       default_addr.devid = 1;
+       ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+                       ETH_VIRTIO_NET_ARG_PIIX3_ADDR,
+                       &piix3_addr, &default_addr);
+       if (ret < 0)
+               goto error;
+
+       eth_dev = virtio_net_eth_dev_alloc(name);
+       if (eth_dev == NULL)
+               goto error;
+
+       ret = qtest_vdev_init(eth_dev->data, qtest_sock, ivshmem_sock,
+                       &virtio_addr, &ivshmem_addr, &piix3_addr);
+       if (ret < 0)
+               goto error;
+
+       /* originally, this will be called in rte_eal_pci_probe() */
+       ret = eth_virtio_dev_init(eth_dev);
+       if (ret < 0)
+               goto error;
+
+       eth_dev->driver = NULL;
+       eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
+       eth_dev->data->kdrv = RTE_KDRV_NONE;
+       eth_dev->data->drv_name = QTEST_DRV_NAME;
+
+       rte_kvargs_free(kvlist);
+       return 0;
+
+error:
+       if (qtest_sock)
+               close (qtest_sock);
+       if (ivshmem_sock)
+               close (ivshmem_sock);
+       rte_kvargs_free(kvlist);
+       return -EFAULT;
+}
+
+/*
+ * Finalization when "CONFIG_RTE_VIRTIO_VDEV_QTEST" is enabled.
+ */
+static int
+rte_qtest_virtio_pmd_uninit(const char *name)
+{
+       struct rte_eth_dev *eth_dev = NULL;
+       int ret;
+
+       if (name == NULL)
+               return -EINVAL;
+
+       /* find the ethdev entry */
+       eth_dev = rte_eth_dev_allocated(name);
+       if (eth_dev == NULL)
+               return -ENODEV;
+
+       ret = eth_virtio_dev_uninit(eth_dev);
+       if (ret != 0)
+               return -EFAULT;
+
+       qtest_vdev_uninit(eth_dev->data);
+       rte_free(eth_dev->data->dev_private);
+
+       ret = rte_eth_dev_release_port(eth_dev);
+       if (ret != 0)
+               return -EFAULT;
+
+       return 0;
+}
+
+static struct rte_driver rte_qtest_virtio_driver = {
+       .name   = QTEST_DRV_NAME,
+       .type   = PMD_VDEV,
+       .init   = rte_qtest_virtio_pmd_init,
+       .uninit = rte_qtest_virtio_pmd_uninit,
+};
+
+PMD_REGISTER_DRIVER(rte_qtest_virtio_driver);
+#endif /* RTE_VIRTIO_VDEV_QTEST */
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 85fbe88..c235102 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -37,10 +37,15 @@
  #include <fcntl.h>
 #endif
 
+#include "virtio_ethdev.h"
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+#include "qtest.h"
+#endif
+
 /*
  * Following macros are derived from linux/pci_regs.h, however,
  * we can't simply include that header here, as there is no such
@@ -440,6 +445,220 @@ static const struct virtio_pci_ops modern_ops = {
 };
 
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+static inline uint8_t
+qtest_read8(struct virtio_hw *hw, uint8_t *addr)
+{
+	return qtest_read(hw, (uint64_t)addr, 'b');
+}
+
+static inline void
+qtest_write8(struct virtio_hw *hw, uint8_t val, uint8_t *addr)
+{
+	return qtest_write(hw, (uint64_t)addr, val, 'b');
+}
+
+static inline uint16_t
+qtest_read16(struct virtio_hw *hw, uint16_t *addr)
+{
+	return qtest_read(hw, (uint64_t)addr, 'w');
+}
+
+static inline void
+qtest_write16(struct virtio_hw *hw, uint16_t val, uint16_t *addr)
+{
+	return qtest_write(hw, (uint64_t)addr, val, 'w');
+}
+
+static inline uint32_t
+qtest_read32(struct virtio_hw *hw, uint32_t *addr)
+{
+	return qtest_read(hw, (uint64_t)addr, 'l');
+}
+
+static inline void
+qtest_write32(struct virtio_hw *hw, uint32_t val, uint32_t *addr)
+{
+	return qtest_write(hw, (uint64_t)addr, val, 'l');
+}
+
+static inline void
+qtest_write64_twopart(struct virtio_hw *hw,
+		uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+	qtest_write32(hw, val & ((1ULL << 32) - 1), lo);
+	qtest_write32(hw, val >> 32,		     hi);
+}
+
+static void
+qtest_modern_read_dev_config(struct virtio_hw *hw, size_t offset,
+		       void *dst, int length)
+{
+	int i;
+	uint8_t *p;
+	uint8_t old_gen, new_gen;
+
+	do {
+		old_gen = qtest_read8(hw, &hw->common_cfg->config_generation);
+
+		p = dst;
+		for (i = 0;  i < length; i++)
+			*p++ = qtest_read8(hw, (uint8_t *)hw->dev_cfg + offset + i);
+
+		new_gen = qtest_read8(hw, &hw->common_cfg->config_generation);
+	} while (old_gen != new_gen);
+}
+
+static void
+qtest_modern_write_dev_config(struct virtio_hw *hw, size_t offset,
+			const void *src, int length)
+{
+	int i;
+	const uint8_t *p = src;
+
+	for (i = 0;  i < length; i++)
+		qtest_write8(hw, *p++, (uint8_t *)hw->dev_cfg + offset + i);
+}
+
+static uint64_t
+qtest_modern_get_features(struct virtio_hw *hw)
+{
+	uint32_t features_lo, features_hi;
+
+	qtest_write32(hw, 0, &hw->common_cfg->device_feature_select);
+	features_lo = qtest_read32(hw, &hw->common_cfg->device_feature);
+
+	qtest_write32(hw, 1, &hw->common_cfg->device_feature_select);
+	features_hi = qtest_read32(hw, &hw->common_cfg->device_feature);
+
+	return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static void
+qtest_modern_set_features(struct virtio_hw *hw, uint64_t features)
+{
+	qtest_write32(hw, 0, &hw->common_cfg->guest_feature_select);
+	qtest_write32(hw, features & ((1ULL << 32) - 1),
+		&hw->common_cfg->guest_feature);
+
+	qtest_write32(hw, 1, &hw->common_cfg->guest_feature_select);
+	qtest_write32(hw, features >> 32,
+		&hw->common_cfg->guest_feature);
+}
+
+static uint8_t
+qtest_modern_get_status(struct virtio_hw *hw)
+{
+	return qtest_read8(hw, &hw->common_cfg->device_status);
+}
+
+static void
+qtest_modern_set_status(struct virtio_hw *hw, uint8_t status)
+{
+	qtest_write8(hw, status, &hw->common_cfg->device_status);
+}
+
+static void
+qtest_modern_reset(struct virtio_hw *hw)
+{
+	modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+	modern_get_status(hw);
+}
+
+static uint8_t
+qtest_modern_get_isr(struct virtio_hw *hw)
+{
+	return qtest_read8(hw, hw->isr);
+}
+
+static uint16_t
+qtest_modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
+{
+	qtest_write16(hw, vec, &hw->common_cfg->msix_config);
+	return qtest_read16(hw, &hw->common_cfg->msix_config);
+}
+
+static uint16_t
+qtest_modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
+{
+	qtest_write16(hw, queue_id, &hw->common_cfg->queue_select);
+	return qtest_read16(hw, &hw->common_cfg->queue_size);
+}
+
+static void
+qtest_modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	uint64_t desc_addr, avail_addr, used_addr;
+	uint16_t notify_off;
+
+	desc_addr = (uint64_t)vq->mz->addr;
+	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
+							 ring[vq->vq_nentries]),
+				   VIRTIO_PCI_VRING_ALIGN);
+
+	qtest_write16(hw, vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	qtest_write64_twopart(hw, desc_addr, &hw->common_cfg->queue_desc_lo,
+				      &hw->common_cfg->queue_desc_hi);
+	qtest_write64_twopart(hw, avail_addr, &hw->common_cfg->queue_avail_lo,
+				       &hw->common_cfg->queue_avail_hi);
+	qtest_write64_twopart(hw, used_addr, &hw->common_cfg->queue_used_lo,
+				      &hw->common_cfg->queue_used_hi);
+
+	notify_off = qtest_read16(hw, &hw->common_cfg->queue_notify_off);
+	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
+				notify_off * hw->notify_off_multiplier);
+
+	qtest_write16(hw, 1, &hw->common_cfg->queue_enable);
+
+	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
+	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
+	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
+	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
+	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
+		vq->notify_addr, notify_off);
+}
+
+static void
+qtest_modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	qtest_write16(hw, vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_desc_lo,
+				  &hw->common_cfg->queue_desc_hi);
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_avail_lo,
+				  &hw->common_cfg->queue_avail_hi);
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_used_lo,
+				  &hw->common_cfg->queue_used_hi);
+
+	qtest_write16(hw, 0, &hw->common_cfg->queue_enable);
+}
+
+static void
+qtest_modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
+{
+	qtest_write16(hw, 1, vq->notify_addr);
+}
+
+static const struct virtio_pci_ops qtest_modern_ops = {
+	.read_dev_cfg	= qtest_modern_read_dev_config,
+	.write_dev_cfg	= qtest_modern_write_dev_config,
+	.reset		= qtest_modern_reset,
+	.get_status	= qtest_modern_get_status,
+	.set_status	= qtest_modern_set_status,
+	.get_features	= qtest_modern_get_features,
+	.set_features	= qtest_modern_set_features,
+	.get_isr	= qtest_modern_get_isr,
+	.set_config_irq	= qtest_modern_set_config_irq,
+	.get_queue_num	= qtest_modern_get_queue_num,
+	.setup_queue	= qtest_modern_setup_queue,
+	.del_queue	= qtest_modern_del_queue,
+	.notify_queue	= qtest_modern_notify_queue,
+};
+#endif /* RTE_VIRTIO_VDEV_QTEST */
+
+
 void
 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
 		      void *dst, int length)
@@ -513,12 +732,16 @@ vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
 }
 
 static void *
-get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
+get_cfg_addr(struct rte_eth_dev *eth_dev,
+		struct virtio_hw *hw,
+		struct virtio_pci_cap *cap)
 {
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
 	uint8_t  bar    = cap->bar;
 	uint32_t length = cap->length;
 	uint32_t offset = cap->offset;
-	uint8_t *base;
+	uint8_t *base = NULL;
+	uint64_t size = 0;
 
 	if (bar > 5) {
 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
@@ -531,14 +754,27 @@ get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
 		return NULL;
 	}
 
-	if (offset + length > dev->mem_resource[bar].len) {
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
+		size = pci_dev->mem_resource[bar].len;
+		base = pci_dev->mem_resource[bar].addr;
+	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0)) {
+		qtest_get_bar_size(hw, "virtio-net", bar, &size);
+		qtest_get_bar_addr(hw, "virtio-net", bar, (uint64_t **)&base);
+	}
+#else
+	RTE_SET_USED(hw);
+#endif
+
+	if (offset + length > size) {
 		PMD_INIT_LOG(ERR,
 			"invalid cap: overflows bar space: %u > %" PRIu64,
-			offset + length, dev->mem_resource[bar].len);
+			offset + length, size);
 		return NULL;
 	}
 
-	base = dev->mem_resource[bar].addr;
 	if (base == NULL) {
 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
 		return NULL;
@@ -548,25 +784,48 @@ get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
 }
 
 static int
-virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
+virtio_read_pci_config(struct rte_eth_dev *eth_dev,
+			struct virtio_hw *hw,
+			void *buf, size_t len, off_t offset)
 {
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	int ret = -1;
+
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0))
+		ret = rte_eal_pci_read_config(pci_dev, buf, len, offset);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0))
+		ret = qtest_read_pci_cfg(hw, "virtio-net", buf, len, offset);
+#else
+	RTE_SET_USED(hw);
+#endif
+
+	return ret;
+}
+
+static int
+virtio_read_caps(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
 	uint8_t pos;
 	struct virtio_pci_cap cap;
 	int ret;
 
-	if (rte_eal_pci_map_device(dev)) {
+	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(rte_eal_pci_map_device(pci_dev) < 0)) {
 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
 		return -1;
 	}
 
-	ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+	ret = virtio_read_pci_config(eth_dev, hw, &pos, 1, PCI_CAPABILITY_LIST);
 	if (ret < 0) {
 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
 		return -1;
 	}
 
 	while (pos) {
-		ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos);
+		ret = virtio_read_pci_config(eth_dev, hw, &cap, sizeof(cap), pos);
 		if (ret < 0) {
 			PMD_INIT_LOG(ERR,
 				"failed to read pci cap at pos: %x", pos);
@@ -586,18 +845,19 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 
 		switch (cap.cfg_type) {
 		case VIRTIO_PCI_CAP_COMMON_CFG:
-			hw->common_cfg = get_cfg_addr(dev, &cap);
+			hw->common_cfg = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
-			rte_eal_pci_read_config(dev, &hw->notify_off_multiplier,
+			virtio_read_pci_config(eth_dev, hw,
+						&hw->notify_off_multiplier,
 						4, pos + sizeof(cap));
-			hw->notify_base = get_cfg_addr(dev, &cap);
+			hw->notify_base = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_DEVICE_CFG:
-			hw->dev_cfg = get_cfg_addr(dev, &cap);
+			hw->dev_cfg = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_ISR_CFG:
-			hw->isr = get_cfg_addr(dev, &cap);
+			hw->isr = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		}
 
@@ -622,31 +882,77 @@ next:
 	return 0;
 }
 
+static int
+vtpci_modern_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+
+	PMD_INIT_LOG(INFO, "modern virtio pci detected.");
+
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
+		hw->vtpci_ops = &modern_ops;
+		pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL, NULL, 0)) {
+		hw->vtpci_ops = &qtest_modern_ops;
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+	}
+#endif
+
+	hw->modern = 1;
+
+	return 0;
+}
+
+static int
+vtpci_legacy_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	struct virtio_pci_cap cap;
+
+	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
+		if (legacy_virtio_resource_init(pci_dev, hw) < 0)
+			return -1;
+
+		hw->vtpci_ops = &legacy_ops;
+		hw->use_msix = legacy_virtio_has_msix(&pci_dev->addr);
+	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0)) {
+		PMD_INIT_LOG(ERR, "Legacy virtio device isn't supported.");
+		return -1;
+	}
+#endif
+
+	cap.bar = cap.length = cap.offset = 0;
+	hw->modern = 0;
+
+	return 0;
+}
+
 int
-vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
 {
-	hw->dev = dev;
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	int ret;
+
+	hw->dev = pci_dev;
 
 	/*
 	 * Try if we can succeed reading virtio pci caps, which exists
 	 * only on modern pci device. If failed, we fallback to legacy
 	 * virtio handling.
 	 */
-	if (virtio_read_caps(dev, hw) == 0) {
-		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
-		hw->vtpci_ops = &modern_ops;
-		hw->modern    = 1;
-		dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
-		return 0;
-	}
+	if (virtio_read_caps(eth_dev, hw) == 0)
+		ret = vtpci_modern_init(eth_dev, hw);
+	else
+		ret = vtpci_legacy_init(eth_dev, hw);
 
-	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
-	if (legacy_virtio_resource_init(dev, hw) < 0)
+	if (ret < 0)
 		return -1;
 
-	hw->vtpci_ops = &legacy_ops;
-	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
-	hw->modern   = 0;
-
 	return 0;
 }
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index ae6777d..41268a7 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -242,6 +242,9 @@ struct virtio_net_config;
 
 struct virtio_hw {
 	struct virtqueue *cvq;
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	void        *qsession;
+#endif
 	struct rte_pci_ioport io;
 	uint64_t    guest_features;
 	uint32_t    max_tx_queues;
@@ -306,7 +309,7 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
 /*
  * Function declaration from virtio_pci.c
  */
-int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
+int vtpci_init(struct rte_eth_dev *, struct virtio_hw *);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v3 6/6] docs: add release note for qtest virtio container support
  2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
                         ` (7 preceding siblings ...)
  2016-02-22  8:17       ` [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD Tetsuya Mukawa
@ 2016-02-22  8:17       ` Tetsuya Mukawa
  2016-02-22 15:40         ` Mcnamara, John
  8 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-22  8:17 UTC (permalink / raw)
  To: dev

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 doc/guides/rel_notes/release_16_04.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/guides/rel_notes/release_16_04.rst b/doc/guides/rel_notes/release_16_04.rst
index 197d4e1..4c6d528 100644
--- a/doc/guides/rel_notes/release_16_04.rst
+++ b/doc/guides/rel_notes/release_16_04.rst
@@ -50,6 +50,9 @@ This section should contain new features added in this release. Sample format:
 
   Add a new virtual device, named eth_cvio, to support virtio for containers.
 
+* **Virtio support for containers using QEMU qtest mode.**
+  Add a new virtual device, named eth_qtest_virtio, to support virtio for containers
+  using QEMU qtest mode.
 
 Resolved Issues
 ---------------
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* Re: [PATCH v3 6/6] docs: add release note for qtest virtio container support
  2016-02-22  8:17       ` [PATCH v3 6/6] docs: add release note for qtest virtio container support Tetsuya Mukawa
@ 2016-02-22 15:40         ` Mcnamara, John
  2016-02-23 10:28           ` Mcnamara, John
  0 siblings, 1 reply; 120+ messages in thread
From: Mcnamara, John @ 2016-02-22 15:40 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Tetsuya Mukawa
> Sent: Monday, February 22, 2016 8:18 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v3 6/6] docs: add release note for qtest virtio
> container support
> ...
> 
> +* **Virtio support for containers using QEMU qtest mode.**
> +  Add a new virtual device, named eth_qtest_virtio, to support virtio for
> containers
> +  using QEMU qtest mode.

There should be a newline after the title and both the title and the description should be in the past tense, as explained in the inline example.

However, no need to respin the patch for this.

Acked-by: John McNamara <john.mcnamara@intel.com>

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v3 6/6] docs: add release note for qtest virtio container support
  2016-02-22 15:40         ` Mcnamara, John
@ 2016-02-23 10:28           ` Mcnamara, John
  2016-02-24  1:20             ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Mcnamara, John @ 2016-02-23 10:28 UTC (permalink / raw)
  To: Mcnamara, John, Tetsuya Mukawa, dev

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Mcnamara, John
> Sent: Monday, February 22, 2016 3:41 PM
> To: Tetsuya Mukawa <mukawa@igel.co.jp>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 6/6] docs: add release note for qtest
> virtio container support
> 

Also, could you move the v2 patchset to "Superseded".

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v3 6/6] docs: add release note for qtest virtio container support
  2016-02-23 10:28           ` Mcnamara, John
@ 2016-02-24  1:20             ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-02-24  1:20 UTC (permalink / raw)
  To: Mcnamara, John, dev

On 2016/02/23 19:28, Mcnamara, John wrote:
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Mcnamara, John
>> Sent: Monday, February 22, 2016 3:41 PM
>> To: Tetsuya Mukawa <mukawa@igel.co.jp>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 6/6] docs: add release note for qtest
>> virtio container support
>>
> Also, could you move the v2 patchset to "Superseded".
>

Thanks for your reviewing.
I will change v2 patches status to "Superseded".

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD
  2016-02-22  8:17       ` [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD Tetsuya Mukawa
@ 2016-03-04  2:18         ` Tan, Jianfeng
  2016-03-04  5:05           ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Tan, Jianfeng @ 2016-03-04  2:18 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev

Hi Tetsuya,

Seems that this patch is too long. Is it possible to split into multiple 
commits?

On 2/22/2016 4:17 PM, Tetsuya Mukawa wrote:
> The patch adds a new virtio-net PMD configuration that allows the PMD to
> work on host as if the PMD is in VM.
> Here is new configuration for virtio-net PMD.
>   - CONFIG_RTE_VIRTIO_VDEV_QTEST
> To use this mode, EAL needs map all hugepages as one file. Also the file
> should be mapped between (1 << 31) and (1 << 44). And start address
> should be aligned by EAL memory size.
>
> To allocate like above, use below options.
>   --single-file
>   --range-virtaddr=0x80000000-0x100000000000
>   --align-memsize
> If a free regions isn't found, EAL will return error.
>
> To prepare virtio-net device on host, the users need to invoke QEMU
> process in special qtest mode. This mode is mainly used for testing QEMU
> devices from outer process. In this mode, no guest runs.
> Here is QEMU command line.
>
>   $ qemu-system-x86_64 \
>       -machine pc-i440fx-1.4,accel=qtest \
>       -display none -qtest-log /dev/null \
>       -qtest unix:/tmp/socket,server \
>       -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
>       -device virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
>       -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>       -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4
>
>   * Should use qemu-2.5.1, or above.
>   * QEMU process is needed per port.
>   * virtio-1.0 device are only supported.
>   * The vhost backends like vhost-net and vhost-user can be specified.
>   * In most cases, just using above command is enough, but you can also
>     specify other QEMU virtio-net options.
>   * Only checked "pc-i440fx-1.4" machine, but may work with other
>     machines.
>   * Should not add "--enable-kvm" to QEMU command line.

Correct me if wrong: all control msgs go through qemu process, e.g., tx 
notifications and rx interrupts need follow frontend-qemu-backend path. 
Question: qemu is started without --enable-kvm, as I understand, 
ioeventfd, the basis of kickfd/callfd, will not be available. So how 
does qemu kick backend or be kicked by backend?

>
> After invoking QEMU, the PMD can connect to QEMU process using unix
> domain sockets. Over these sockets, virtio-net, ivshmem and piix3
> device in QEMU are probed by the PMD.
> Here is example of command line.
>
>   $ testpmd -c f -n 1 -m 1024 --no-pci --single-file \
>        --range-virtaddr=0x80000000-0x100000000000 --align-memsize \
>        --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
>        -- --disable-hw-vlan --txqflags=0xf00 -i
>
> Please specify same unix domain sockets and memory size in both QEMU
> and DPDK command lines like above.
> The share memory size should be power of 2, because ivshmem only
> accepts such memory size.
>
> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> ---
>   config/common_linuxapp             |    1 +
>   drivers/net/virtio/Makefile        |    4 +
>   drivers/net/virtio/qtest.c         | 1342 ++++++++++++++++++++++++++++++++++++
>   drivers/net/virtio/qtest.h         |   65 ++
>   drivers/net/virtio/virtio_ethdev.c |  383 +++++++++-
>   drivers/net/virtio/virtio_pci.c    |  364 +++++++++-
>   drivers/net/virtio/virtio_pci.h    |    5 +-
>   7 files changed, 2122 insertions(+), 42 deletions(-)
>   create mode 100644 drivers/net/virtio/qtest.c
>   create mode 100644 drivers/net/virtio/qtest.h
>
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 452f39c..f6e53bc 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -533,3 +533,4 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
>   # Enable virtio support for container
>   #
>   CONFIG_RTE_VIRTIO_VDEV=y
> +CONFIG_RTE_VIRTIO_VDEV_QTEST=y
> diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
> index ef920f9..6c11378 100644
> --- a/drivers/net/virtio/Makefile
> +++ b/drivers/net/virtio/Makefile
> @@ -56,6 +56,10 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
>   	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost_embedded.c
>   endif
>   
> +ifeq ($(CONFIG_RTE_VIRTIO_VDEV_QTEST),y)
> +	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
> +endif
> +
>   # this lib depends upon:
>   DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
>   DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
> diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
> new file mode 100644
> index 0000000..061aab5
> --- /dev/null
> +++ b/drivers/net/virtio/qtest.c
> @@ -0,0 +1,1342 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of IGEL Co., Ltd. nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/queue.h>
> +#include <signal.h>
> +#include <pthread.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <sys/eventfd.h>
> +#include <linux/pci_regs.h>
> +
> +#include <rte_memory.h>
> +#include <rte_malloc.h>
> +#include <rte_common.h>
> +#include <rte_interrupts.h>
> +
> +#include "virtio_pci.h"
> +#include "virtio_logs.h"
> +#include "virtio_ethdev.h"
> +#include "qtest.h"
> +
> +#define NB_BAR                          6
> +
> +/* PIIX3 configuration registers */
> +#define PIIX3_REG_ADDR_PIRQA            0x60
> +#define PIIX3_REG_ADDR_PIRQB            0x61
> +#define PIIX3_REG_ADDR_PIRQC            0x62
> +#define PIIX3_REG_ADDR_PIRQD            0x63
> +
> +/* Device information */
> +#define VIRTIO_NET_DEVICE_ID            0x1000
> +#define VIRTIO_NET_VENDOR_ID            0x1af4
> +#define VIRTIO_NET_IRQ_NUM              10
> +#define IVSHMEM_DEVICE_ID               0x1110
> +#define IVSHMEM_VENDOR_ID               0x1af4
> +#define IVSHMEM_PROTOCOL_VERSION        0
> +#define PIIX3_DEVICE_ID                 0x7000
> +#define PIIX3_VENDOR_ID                 0x8086
> +
> +/* ------------------------------------------------------------
> + * IO port mapping of qtest guest
> + * ------------------------------------------------------------
> + * 0x0000 - 0xbfff : not used
> + * 0xc000 - 0xc03f : virtio-net(BAR0)
> + * 0xc040 - 0xffff : not used
> + *
> + * ------------------------------------------------------------
> + * Memory mapping of qtest quest
> + * ------------------------------------------------------------
> + * 0x00000000_00000000 - 0x00000000_3fffffff : not used
> + * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
> + * 0x00000000_40001000 - 0x00000000_40ffffff : not used
> + * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
> + * 0x00000000_41800000 - 0x00000000_41ffffff : not used
> + * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
> + * 0x00000000_42000100 - 0x00000000_42ffffff : not used
> + * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)
> + *

Is it possible to arrange multiple virtio-net devices here? What's the 
challenges?

Seems that lots of below code do the same work as libqos. So can we just 
link libqos? Or we need to maintain this code.

> + * We can only specify start address of a region. The region size
> + * will be defined by the device implementation in QEMU.
> + * The size will be pow of 2 according to the PCI specification.
> + * Also, the region start address should be aligned by region size.
> + *
> + * BAR2 of ivshmem will be used to mmap DPDK application memory.
> + * So this address will be dynamically changed, but not to overlap
> + * others, it should be mmaped between above addresses. Such allocation
> + * is done by EAL. Check rte_eal_get_free_region() also.
> + */
> +#define VIRTIO_NET_IO_START             0xc000
> +#define VIRTIO_NET_MEMORY1_START	0x40000000
> +#define VIRTIO_NET_MEMORY2_START	0x41000000
> +#define IVSHMEM_MEMORY_START            0x42000000
> +
> +#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
> +	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
> +	((_function) & 0x7) << 8 | ((_offset) & 0xfc))
> +
> +static char interrupt_message[32];
> +
> +enum qtest_pci_bar_type {
> +	QTEST_PCI_BAR_DISABLE = 0,
> +	QTEST_PCI_BAR_IO,
> +	QTEST_PCI_BAR_MEMORY_UNDER_1MB,
> +	QTEST_PCI_BAR_MEMORY_32,
> +	QTEST_PCI_BAR_MEMORY_64
> +};
> +
> +struct qtest_pci_bar {
> +	enum qtest_pci_bar_type type;
> +	uint8_t addr;
> +	uint64_t region_start;
> +	uint64_t region_size;
> +};
> +
> +struct qtest_session;
> +TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
> +struct qtest_pci_device {
> +	TAILQ_ENTRY(qtest_pci_device) next;
> +	const char *name;
> +	uint16_t device_id;
> +	uint16_t vendor_id;
> +	uint8_t bus_addr;
> +	uint8_t device_addr;
> +	struct qtest_pci_bar bar[NB_BAR];
> +	int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
> +};
> +
> +union qtest_pipefds {
> +	struct {
> +		int pipefd[2];
> +	};
> +	struct {
> +		int readfd;
> +		int writefd;
> +	};
> +};
> +
> +struct qtest_session {
> +	int qtest_socket;
> +	pthread_mutex_t qtest_session_lock;
> +
> +	struct qtest_pci_device_list head;
> +	int ivshmem_socket;
> +
> +	pthread_t event_th;
> +	char *evq;
> +	char *evq_dequeue_ptr;
> +	size_t evq_total_len;
> +
> +	union qtest_pipefds msgfds;
> +
> +	pthread_t intr_th;
> +	int eventfd;
> +	rte_atomic16_t enable_intr;
> +	rte_intr_callback_fn cb;
> +	void *cb_arg;
> +	struct rte_eth_dev_data *eth_data;
> +};
> +
> +static int
> +qtest_raw_send(int fd, char *buf, size_t count)
> +{
> +	size_t len = count;
> +	size_t total_len = 0;
> +	int ret = 0;
> +
> +	while (len > 0) {
> +		ret = write(fd, buf, len);
> +		if (ret == -1) {
> +			if (errno == EINTR)
> +				continue;
> +			return ret;
> +		}
> +		if (ret == (int)len)
> +			break;

This _if_ complicates this function, remove this and rely on while 
condition to quit? Same as in the below qtest_raw_recv().

Thanks,
Jianfeng

> +		total_len += ret;
> +		buf += ret;
> +		len -= ret;
> +	}
> +	return total_len + ret;
> +}
> +
> +static int
> +qtest_raw_recv(int fd, char *buf, size_t count)
> +{
> +	size_t len = count;
> +	size_t total_len = 0;
> +	int ret = 0;
> +
> +	while (len > 0) {
> +		ret = read(fd, buf, len);
> +		if (ret <= 0) {
> +			if (errno == EINTR) {
> +				continue;
> +			}
> +			return ret;
> +		}
> +		if (ret == (int)len)
> +			break;
> +		if (*(buf + ret - 1) == '\n')
> +			break;
> +		total_len += ret;
> +		buf += ret;
> +		len -= ret;
> +	}
> +	return total_len + ret;
> +}
> +
[...]

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
@ 2016-03-04  2:20         ` Tan, Jianfeng
  2016-03-09  8:33         ` [PATCH v4 00/12] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
                           ` (12 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tan, Jianfeng @ 2016-03-04  2:20 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev

Hi Tetsuya,

On 2/22/2016 4:17 PM, Tetsuya Mukawa wrote:
> The option specifies how to mmap EAL memory.
> If the option is specified like '--range-virtaddr=<addr1>-<addr2>',
> EAL will check /proc/maps, then tries to find free region between addr1
> and addr2. If a region is found, EAL will treat it as if 'base-virtaddr'
> is specified. Because of this, the option will not work with
> '--base-virtaddr'.
>
> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> ---
>   lib/librte_eal/common/eal_common_options.c |  9 ++++
>   lib/librte_eal/common/eal_internal_cfg.h   |  2 +
>   lib/librte_eal/common/eal_options.h        |  2 +
>   lib/librte_eal/linuxapp/eal/eal.c          | 39 ++++++++++++++
>   lib/librte_eal/linuxapp/eal/eal_memory.c   | 82 +++++++++++++++++++++++++++++-
>   5 files changed, 133 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
> index 65bccbd..3b4f789 100644
> --- a/lib/librte_eal/common/eal_common_options.c
> +++ b/lib/librte_eal/common/eal_common_options.c
> @@ -74,6 +74,7 @@ eal_short_options[] =
>   const struct option
>   eal_long_options[] = {
>   	{OPT_BASE_VIRTADDR,     1, NULL, OPT_BASE_VIRTADDR_NUM    },
> +	{OPT_RANGE_VIRTADDR,    1, NULL, OPT_RANGE_VIRTADDR_NUM   },
>   	{OPT_CREATE_UIO_DEV,    0, NULL, OPT_CREATE_UIO_DEV_NUM   },
>   	{OPT_FILE_PREFIX,       1, NULL, OPT_FILE_PREFIX_NUM      },
>   	{OPT_HELP,              0, NULL, OPT_HELP_NUM             },
> @@ -137,6 +138,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
>   	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
>   		internal_cfg->hugepage_info[i].lock_descriptor = -1;
>   	internal_cfg->base_virtaddr = 0;
> +	internal_cfg->range_virtaddr_start = 0;
> +	internal_cfg->range_virtaddr_end = 0;
>   
>   	internal_cfg->syslog_facility = LOG_DAEMON;
>   	/* default value from build option */
> @@ -985,6 +988,12 @@ eal_check_common_options(struct internal_config *internal_cfg)
>   		return -1;
>   	}
>   
> +	if (internal_cfg->base_virtaddr && internal_cfg->range_virtaddr_end) {
> +		RTE_LOG(ERR, EAL, "Option --"OPT_RANGE_VIRTADDR" cannot "
> +			"be specified together with --"OPT_BASE_VIRTADDR"\n");
> +		return -1;
> +	}
> +
>   	return 0;
>   }
>   
> diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
> index 9117ed9..0734630 100644
> --- a/lib/librte_eal/common/eal_internal_cfg.h
> +++ b/lib/librte_eal/common/eal_internal_cfg.h
> @@ -78,6 +78,8 @@ struct internal_config {
>   	volatile unsigned force_sockets;
>   	volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
>   	uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
> +	uintptr_t range_virtaddr_start;   /**< start address of mappable region */
> +	uintptr_t range_virtaddr_end;     /**< end address of mappable region */
>   	volatile int syslog_facility;	  /**< facility passed to openlog() */
>   	volatile uint32_t log_level;	  /**< default log level */
>   	/** default interrupt mode for VFIO */
> diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
> index e5da14a..8e4cf1d 100644
> --- a/lib/librte_eal/common/eal_options.h
> +++ b/lib/librte_eal/common/eal_options.h
> @@ -47,6 +47,8 @@ enum {
>   	OPT_LONG_MIN_NUM = 256,
>   #define OPT_BASE_VIRTADDR     "base-virtaddr"
>   	OPT_BASE_VIRTADDR_NUM,
> +#define OPT_RANGE_VIRTADDR    "range-virtaddr"
> +	OPT_RANGE_VIRTADDR_NUM,
>   #define OPT_CREATE_UIO_DEV    "create-uio-dev"
>   	OPT_CREATE_UIO_DEV_NUM,
>   #define OPT_FILE_PREFIX       "file-prefix"
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 82f34f7..80f1995 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -444,6 +444,35 @@ eal_parse_base_virtaddr(const char *arg)
>   }
>   
>   static int
> +eal_parse_range_virtaddr(const char *range)
> +{
> +	char *p, *endptr;
> +	uint64_t tmp_start, tmp_end;
> +
> +	p = strchr(range, '-');
> +	if (p == NULL)
> +		return -1;
> +	*p++ = '\0';
> +
> +	errno = 0;
> +	tmp_start = strtoul(range, &endptr, 0);
> +	if ((errno != 0) || endptr == NULL || (*endptr != '\0'))
> +		return -1;
> +
> +	tmp_end = strtoul(p, &endptr, 0);
> +	if ((errno != 0) || endptr == NULL || (*endptr != '\0'))
> +		return -1;
> +
> +	if (tmp_start >= tmp_end)
> +		return -1;
> +
> +	internal_config.range_virtaddr_start = tmp_start;
> +	internal_config.range_virtaddr_end = tmp_end;
> +
> +	return 0;
> +}
> +
> +static int
>   eal_parse_vfio_intr(const char *mode)
>   {
>   	unsigned i;
> @@ -604,6 +633,16 @@ eal_parse_args(int argc, char **argv)
>   			}
>   			break;
>   
> +		case OPT_RANGE_VIRTADDR_NUM:
> +			if (eal_parse_range_virtaddr(optarg) < 0) {
> +				RTE_LOG(ERR, EAL, "invalid parameter for --"
> +						OPT_RANGE_VIRTADDR "\n");
> +				eal_usage(prgname);
> +				ret = -1;
> +				goto out;
> +			}
> +			break;
> +
>   		case OPT_VFIO_INTR_NUM:
>   			if (eal_parse_vfio_intr(optarg) < 0) {
>   				RTE_LOG(ERR, EAL, "invalid parameters for --"
> diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
> index a6b3616..d608273 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_memory.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
> @@ -251,6 +251,66 @@ aslr_enabled(void)
>   }
>   
>   /*
> + * Find memory space that fits user request.
> + */
> +static uintptr_t
> +rte_eal_get_free_region(uint64_t pagesz)
> +{
> +	uint64_t alloc_size, start, end, next_start;
> +	uint64_t low_limit, high_limit;
> +	uintptr_t addr = 0;
> +	char buf[1024], *p;
> +	FILE *fp;
> +
> +	alloc_size = internal_config.memory;
> +	low_limit = internal_config.range_virtaddr_start;
> +	high_limit = internal_config.range_virtaddr_end;
> +
> +	/* allocation size should be aligned by page size */
> +	if (alloc_size != RTE_ALIGN_CEIL(alloc_size, pagesz)) {
> +		rte_panic("Invalid allocation size 0x%lx\n", alloc_size);
> +		return NULL;

This line causes compiling error:
lib/librte_eal/linuxapp/eal/eal_memory.c:272:3: error: return makes 
integer from pointer without a cast [-Werror]
    return NULL;
    ^


Thanks,
Jianfeng

> +	}
> +
> +	fp = fopen("/proc/self/maps", "r");
> +	if (fp == NULL) {
> +		rte_panic("Cannot open /proc/self/maps\n");
> +		return NULL;

Ditto.

Thanks,
Jianfeng

> +	}
> +
> +	next_start = 0;
> +	do {
> +		start = next_start;
> +
> +		if ((p = fgets(buf, sizeof(buf), fp)) != NULL) {
> +			if (sscanf(p, "%lx-%lx ", &end, &next_start) < 2)
> +				break;
> +
> +			next_start = RTE_ALIGN_CEIL(next_start, alloc_size);
> +			end = RTE_ALIGN_CEIL(end, alloc_size) - 1;
> +		} else
> +			end = UINT64_MAX;
> +
> +		if (start >= high_limit)
> +			break;
> +		if (end < low_limit)
> +			continue;
> +
> +		start = RTE_MAX(start, low_limit);
> +		end = RTE_MIN(end, high_limit - 1);
> +
> +		if (end - start >= alloc_size - 1) {
> +			addr = start;
> +			break;
> +		}
> +	} while (end != UINT64_MAX);
> +
> +	fclose(fp);
> +
> +	return addr;
> +}
> +
> +/*
>    * Try to mmap *size bytes in /dev/zero. If it is successful, return the
>    * pointer to the mmap'd area and keep *size unmodified. Else, retry
>    * with a smaller zone: decrease *size by hugepage_sz until it reaches
> @@ -1126,6 +1186,25 @@ rte_eal_hugepage_init(void)
>   	/* get pointer to global configuration */
>   	mcfg = rte_eal_get_configuration()->mem_config;
>   
> +	if (internal_config.range_virtaddr_end) {
> +		uint64_t pagesize = RTE_PGSIZE_4K;
> +		struct hugepage_info *hpi;
> +		unsigned n;
> +		uintptr_t addr;
> +
> +		/* determine maximum hugepage size */
> +		for (n = 0; n < internal_config.num_hugepage_sizes; n++) {
> +			hpi = &internal_config.hugepage_info[n];
> +			pagesize = RTE_MAX(hpi->hugepage_sz, pagesize);
> +		}
> +
> +		addr = rte_eal_get_free_region(pagesize);
> +		if (addr == 0)
> +			RTE_LOG(WARNING, EAL,
> +				"no free space to mmap in specified region\n");
> +		internal_config.base_virtaddr = addr;
> +	}
> +
>   	/* when hugetlbfs is disabled or single-file option is specified */
>   	if (internal_config.no_hugetlbfs || internal_config.single_file) {
>   		int fd;
> @@ -1158,7 +1237,8 @@ rte_eal_hugepage_init(void)
>   			return -1;
>   		}
>   
> -		addr = mmap(NULL, internal_config.memory,
> +		addr = mmap((void *)internal_config.base_virtaddr,
> +			    internal_config.memory,
>   			    PROT_READ | PROT_WRITE,
>   			    MAP_SHARED | MAP_POPULATE, fd, 0);
>   		if (addr == MAP_FAILED) {

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD
  2016-03-04  2:18         ` Tan, Jianfeng
@ 2016-03-04  5:05           ` Tetsuya Mukawa
  2016-03-04  6:10             ` Tan, Jianfeng
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-04  5:05 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 2016/03/04 11:18, Tan, Jianfeng wrote:
> Hi Tetsuya,
>
> Seems that this patch is too long. Is it possible to split into
> multiple commits?

Hi Jianfeng,

Sure, will do.

>
> On 2/22/2016 4:17 PM, Tetsuya Mukawa wrote:
>> The patch adds a new virtio-net PMD configuration that allows the PMD to
>> work on host as if the PMD is in VM.
>> Here is new configuration for virtio-net PMD.
>>   - CONFIG_RTE_VIRTIO_VDEV_QTEST
>> To use this mode, EAL needs map all hugepages as one file. Also the file
>> should be mapped between (1 << 31) and (1 << 44). And start address
>> should be aligned by EAL memory size.
>>
>> To allocate like above, use below options.
>>   --single-file
>>   --range-virtaddr=0x80000000-0x100000000000
>>   --align-memsize
>> If a free regions isn't found, EAL will return error.
>>
>> To prepare virtio-net device on host, the users need to invoke QEMU
>> process in special qtest mode. This mode is mainly used for testing QEMU
>> devices from outer process. In this mode, no guest runs.
>> Here is QEMU command line.
>>
>>   $ qemu-system-x86_64 \
>>       -machine pc-i440fx-1.4,accel=qtest \
>>       -display none -qtest-log /dev/null \
>>       -qtest unix:/tmp/socket,server \
>>       -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
>>       -device
>> virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
>>       -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>>       -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4
>>
>>   * Should use qemu-2.5.1, or above.
>>   * QEMU process is needed per port.
>>   * virtio-1.0 device are only supported.
>>   * The vhost backends like vhost-net and vhost-user can be specified.
>>   * In most cases, just using above command is enough, but you can also
>>     specify other QEMU virtio-net options.
>>   * Only checked "pc-i440fx-1.4" machine, but may work with other
>>     machines.
>>   * Should not add "--enable-kvm" to QEMU command line.
>
> Correct me if wrong: all control msgs go through qemu process, e.g.,
> tx notifications and rx interrupts need follow frontend-qemu-backend
> path. Question: qemu is started without --enable-kvm, as I understand,
> ioeventfd, the basis of kickfd/callfd, will not be available. So how
> does qemu kick backend or be kicked by backend?

Actually, vhost-backend process will receive kickfd and callfd as -1.
(Currently, we have a bug in librte_vhost, because the library treats -1
as "not initialized state". But actually without "--enable-kvm", -1 will
be set by qemu to initialize kickfd and callfd. I will send a patch for
the issue with next patch series.)

In our case, virtio-net driver and vhost-backend driver are PMD. So we
don't use kickfd and callfd, right?

If you worried about vhost-net case, vhost-net kernel thread will work
without ioeventfd and irqfd.
In this case, virtio-net PMD can kick the vhost-net by accessing
VIRTIO_PCI_QUEUE_NOTIFY register.
(vhost-net doesn't need to kick virtio-net driver, because the driver is
PMD.)

>
>>
>> After invoking QEMU, the PMD can connect to QEMU process using unix
>> domain sockets. Over these sockets, virtio-net, ivshmem and piix3
>> device in QEMU are probed by the PMD.
>> Here is example of command line.
>>
>>   $ testpmd -c f -n 1 -m 1024 --no-pci --single-file \
>>        --range-virtaddr=0x80000000-0x100000000000 --align-memsize \
>>       
>> --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
>>        -- --disable-hw-vlan --txqflags=0xf00 -i
>>
>> Please specify same unix domain sockets and memory size in both QEMU
>> and DPDK command lines like above.
>> The share memory size should be power of 2, because ivshmem only
>> accepts such memory size.
>>
>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>> ---
>>   config/common_linuxapp             |    1 +
>>   drivers/net/virtio/Makefile        |    4 +
>>   drivers/net/virtio/qtest.c         | 1342
>> ++++++++++++++++++++++++++++++++++++
>>   drivers/net/virtio/qtest.h         |   65 ++
>>   drivers/net/virtio/virtio_ethdev.c |  383 +++++++++-
>>   drivers/net/virtio/virtio_pci.c    |  364 +++++++++-
>>   drivers/net/virtio/virtio_pci.h    |    5 +-
>>   7 files changed, 2122 insertions(+), 42 deletions(-)
>>   create mode 100644 drivers/net/virtio/qtest.c
>>   create mode 100644 drivers/net/virtio/qtest.h
>>
>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>> index 452f39c..f6e53bc 100644
>> --- a/config/common_linuxapp
>> +++ b/config/common_linuxapp
>> @@ -533,3 +533,4 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
>>   # Enable virtio support for container
>>   #
>>   CONFIG_RTE_VIRTIO_VDEV=y
>> +CONFIG_RTE_VIRTIO_VDEV_QTEST=y
>> diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
>> index ef920f9..6c11378 100644
>> --- a/drivers/net/virtio/Makefile
>> +++ b/drivers/net/virtio/Makefile
>> @@ -56,6 +56,10 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
>>       SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost_embedded.c
>>   endif
>>   +ifeq ($(CONFIG_RTE_VIRTIO_VDEV_QTEST),y)
>> +    SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
>> +endif
>> +
>>   # this lib depends upon:
>>   DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal
>> lib/librte_ether
>>   DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool
>> lib/librte_mbuf
>> diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
>> new file mode 100644
>> index 0000000..061aab5
>> --- /dev/null
>> +++ b/drivers/net/virtio/qtest.c
>> @@ -0,0 +1,1342 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above
>> copyright
>> + *       notice, this list of conditions and the following
>> disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of IGEL Co., Ltd. nor the names of its
>> + *       contributors may be used to endorse or promote products
>> derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>> CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>> FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>> COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>> INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
>> OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
>> ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>> TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
>> THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>> DAMAGE.
>> + */
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <sys/types.h>
>> +#include <sys/socket.h>
>> +#include <sys/un.h>
>> +#include <sys/queue.h>
>> +#include <signal.h>
>> +#include <pthread.h>
>> +#include <sys/stat.h>
>> +#include <fcntl.h>
>> +#include <sys/eventfd.h>
>> +#include <linux/pci_regs.h>
>> +
>> +#include <rte_memory.h>
>> +#include <rte_malloc.h>
>> +#include <rte_common.h>
>> +#include <rte_interrupts.h>
>> +
>> +#include "virtio_pci.h"
>> +#include "virtio_logs.h"
>> +#include "virtio_ethdev.h"
>> +#include "qtest.h"
>> +
>> +#define NB_BAR                          6
>> +
>> +/* PIIX3 configuration registers */
>> +#define PIIX3_REG_ADDR_PIRQA            0x60
>> +#define PIIX3_REG_ADDR_PIRQB            0x61
>> +#define PIIX3_REG_ADDR_PIRQC            0x62
>> +#define PIIX3_REG_ADDR_PIRQD            0x63
>> +
>> +/* Device information */
>> +#define VIRTIO_NET_DEVICE_ID            0x1000
>> +#define VIRTIO_NET_VENDOR_ID            0x1af4
>> +#define VIRTIO_NET_IRQ_NUM              10
>> +#define IVSHMEM_DEVICE_ID               0x1110
>> +#define IVSHMEM_VENDOR_ID               0x1af4
>> +#define IVSHMEM_PROTOCOL_VERSION        0
>> +#define PIIX3_DEVICE_ID                 0x7000
>> +#define PIIX3_VENDOR_ID                 0x8086
>> +
>> +/* ------------------------------------------------------------
>> + * IO port mapping of qtest guest
>> + * ------------------------------------------------------------
>> + * 0x0000 - 0xbfff : not used
>> + * 0xc000 - 0xc03f : virtio-net(BAR0)
>> + * 0xc040 - 0xffff : not used
>> + *
>> + * ------------------------------------------------------------
>> + * Memory mapping of qtest quest
>> + * ------------------------------------------------------------
>> + * 0x00000000_00000000 - 0x00000000_3fffffff : not used
>> + * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
>> + * 0x00000000_40001000 - 0x00000000_40ffffff : not used
>> + * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
>> + * 0x00000000_41800000 - 0x00000000_41ffffff : not used
>> + * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
>> + * 0x00000000_42000100 - 0x00000000_42ffffff : not used
>> + * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)
>> + *
>
> Is it possible to arrange multiple virtio-net devices here? What's the
> challenges?

Yes, you can manage multiple virtio-net devices here, if you define
correct memory map.

>
> Seems that lots of below code do the same work as libqos. So can we
> just link libqos? Or we need to maintain this code.

Problem is libqos will be GPL.
So I wrote the code from scratch.

>
>> + * We can only specify start address of a region. The region size
>> + * will be defined by the device implementation in QEMU.
>> + * The size will be pow of 2 according to the PCI specification.
>> + * Also, the region start address should be aligned by region size.
>> + *
>> + * BAR2 of ivshmem will be used to mmap DPDK application memory.
>> + * So this address will be dynamically changed, but not to overlap
>> + * others, it should be mmaped between above addresses. Such allocation
>> + * is done by EAL. Check rte_eal_get_free_region() also.
>> + */
>> +#define VIRTIO_NET_IO_START             0xc000
>> +#define VIRTIO_NET_MEMORY1_START    0x40000000
>> +#define VIRTIO_NET_MEMORY2_START    0x41000000
>> +#define IVSHMEM_MEMORY_START            0x42000000
>> +
>> +#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
>> +    (1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
>> +    ((_function) & 0x7) << 8 | ((_offset) & 0xfc))
>> +
>> +static char interrupt_message[32];
>> +
>> +enum qtest_pci_bar_type {
>> +    QTEST_PCI_BAR_DISABLE = 0,
>> +    QTEST_PCI_BAR_IO,
>> +    QTEST_PCI_BAR_MEMORY_UNDER_1MB,
>> +    QTEST_PCI_BAR_MEMORY_32,
>> +    QTEST_PCI_BAR_MEMORY_64
>> +};
>> +
>> +struct qtest_pci_bar {
>> +    enum qtest_pci_bar_type type;
>> +    uint8_t addr;
>> +    uint64_t region_start;
>> +    uint64_t region_size;
>> +};
>> +
>> +struct qtest_session;
>> +TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
>> +struct qtest_pci_device {
>> +    TAILQ_ENTRY(qtest_pci_device) next;
>> +    const char *name;
>> +    uint16_t device_id;
>> +    uint16_t vendor_id;
>> +    uint8_t bus_addr;
>> +    uint8_t device_addr;
>> +    struct qtest_pci_bar bar[NB_BAR];
>> +    int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
>> +};
>> +
>> +union qtest_pipefds {
>> +    struct {
>> +        int pipefd[2];
>> +    };
>> +    struct {
>> +        int readfd;
>> +        int writefd;
>> +    };
>> +};
>> +
>> +struct qtest_session {
>> +    int qtest_socket;
>> +    pthread_mutex_t qtest_session_lock;
>> +
>> +    struct qtest_pci_device_list head;
>> +    int ivshmem_socket;
>> +
>> +    pthread_t event_th;
>> +    char *evq;
>> +    char *evq_dequeue_ptr;
>> +    size_t evq_total_len;
>> +
>> +    union qtest_pipefds msgfds;
>> +
>> +    pthread_t intr_th;
>> +    int eventfd;
>> +    rte_atomic16_t enable_intr;
>> +    rte_intr_callback_fn cb;
>> +    void *cb_arg;
>> +    struct rte_eth_dev_data *eth_data;
>> +};
>> +
>> +static int
>> +qtest_raw_send(int fd, char *buf, size_t count)
>> +{
>> +    size_t len = count;
>> +    size_t total_len = 0;
>> +    int ret = 0;
>> +
>> +    while (len > 0) {
>> +        ret = write(fd, buf, len);
>> +        if (ret == -1) {
>> +            if (errno == EINTR)
>> +                continue;
>> +            return ret;
>> +        }
>> +        if (ret == (int)len)
>> +            break;
>
> This _if_ complicates this function, remove this and rely on while
> condition to quit? Same as in the below qtest_raw_recv().
>

Yes, I will remove it.

Thanks,
Tetsuya

> Thanks,
> Jianfeng
>
>> +        total_len += ret;
>> +        buf += ret;
>> +        len -= ret;
>> +    }
>> +    return total_len + ret;
>> +}
>> +
>> +static int
>> +qtest_raw_recv(int fd, char *buf, size_t count)
>> +{
>> +    size_t len = count;
>> +    size_t total_len = 0;
>> +    int ret = 0;
>> +
>> +    while (len > 0) {
>> +        ret = read(fd, buf, len);
>> +        if (ret <= 0) {
>> +            if (errno == EINTR) {
>> +                continue;
>> +            }
>> +            return ret;
>> +        }
>> +        if (ret == (int)len)
>> +            break;
>> +        if (*(buf + ret - 1) == '\n')
>> +            break;
>> +        total_len += ret;
>> +        buf += ret;
>> +        len -= ret;
>> +    }
>> +    return total_len + ret;
>> +}
>> +
> [...]

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD
  2016-03-04  5:05           ` Tetsuya Mukawa
@ 2016-03-04  6:10             ` Tan, Jianfeng
  2016-03-04  9:53               ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Tan, Jianfeng @ 2016-03-04  6:10 UTC (permalink / raw)
  To: Tetsuya Mukawa, dev

Hi Tetsuya,

On 3/4/2016 1:05 PM, Tetsuya Mukawa wrote:
> On 2016/03/04 11:18, Tan, Jianfeng wrote:
>> Hi Tetsuya,
>>
>> Seems that this patch is too long. Is it possible to split into
>> multiple commits?
> Hi Jianfeng,
>
> Sure, will do.
>
>> On 2/22/2016 4:17 PM, Tetsuya Mukawa wrote:
>>> The patch adds a new virtio-net PMD configuration that allows the PMD to
>>> work on host as if the PMD is in VM.
>>> Here is new configuration for virtio-net PMD.
>>>    - CONFIG_RTE_VIRTIO_VDEV_QTEST
>>> To use this mode, EAL needs map all hugepages as one file. Also the file
>>> should be mapped between (1 << 31) and (1 << 44). And start address
>>> should be aligned by EAL memory size.
>>>
>>> To allocate like above, use below options.
>>>    --single-file
>>>    --range-virtaddr=0x80000000-0x100000000000
>>>    --align-memsize
>>> If a free regions isn't found, EAL will return error.
>>>
>>> To prepare virtio-net device on host, the users need to invoke QEMU
>>> process in special qtest mode. This mode is mainly used for testing QEMU
>>> devices from outer process. In this mode, no guest runs.
>>> Here is QEMU command line.
>>>
>>>    $ qemu-system-x86_64 \
>>>        -machine pc-i440fx-1.4,accel=qtest \
>>>        -display none -qtest-log /dev/null \
>>>        -qtest unix:/tmp/socket,server \
>>>        -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
>>>        -device
>>> virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
>>>        -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>>>        -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4
>>>
>>>    * Should use qemu-2.5.1, or above.
>>>    * QEMU process is needed per port.
>>>    * virtio-1.0 device are only supported.
>>>    * The vhost backends like vhost-net and vhost-user can be specified.
>>>    * In most cases, just using above command is enough, but you can also
>>>      specify other QEMU virtio-net options.
>>>    * Only checked "pc-i440fx-1.4" machine, but may work with other
>>>      machines.
>>>    * Should not add "--enable-kvm" to QEMU command line.
>> Correct me if wrong: all control msgs go through qemu process, e.g.,
>> tx notifications and rx interrupts need follow frontend-qemu-backend
>> path. Question: qemu is started without --enable-kvm, as I understand,
>> ioeventfd, the basis of kickfd/callfd, will not be available. So how
>> does qemu kick backend or be kicked by backend?
> Actually, vhost-backend process will receive kickfd and callfd as -1.
> (Currently, we have a bug in librte_vhost, because the library treats -1
> as "not initialized state". But actually without "--enable-kvm", -1 will
> be set by qemu to initialize kickfd and callfd. I will send a patch for
> the issue with next patch series.)

Yes, we noticed the problem too: librte_vhost judges virtio_is_ready by 
whether both fds are set. But except that, what's kernel's way to do the 
judgement? In addition, it would be better to be a independent fix patch.

>
> In our case, virtio-net driver and vhost-backend driver are PMD. So we
> don't use kickfd and callfd, right?
>
> If you worried about vhost-net case, vhost-net kernel thread will work
> without ioeventfd and irqfd.
> In this case, virtio-net PMD can kick the vhost-net by accessing
> VIRTIO_PCI_QUEUE_NOTIFY register.
> (vhost-net doesn't need to kick virtio-net driver, because the driver is
> PMD.)

I ask this question because I think interrupt mode will help the 
scalability. Return to your solution, by accessing 
VIRTIO_PCI_QUEUE_NOTIFY register, virtio-net PMD can only wake up qemu, 
but how does qemu wakes up vhost-net under the case that kickfd = callfd 
= -1.

>
>>> After invoking QEMU, the PMD can connect to QEMU process using unix
>>> domain sockets. Over these sockets, virtio-net, ivshmem and piix3
>>> device in QEMU are probed by the PMD.
>>> Here is example of command line.
>>>
>>>    $ testpmd -c f -n 1 -m 1024 --no-pci --single-file \
>>>         --range-virtaddr=0x80000000-0x100000000000 --align-memsize \
>>>        
>>> --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
>>>         -- --disable-hw-vlan --txqflags=0xf00 -i
>>>
>>> Please specify same unix domain sockets and memory size in both QEMU
>>> and DPDK command lines like above.
>>> The share memory size should be power of 2, because ivshmem only
>>> accepts such memory size.
>>>
>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>> ---
>>>    config/common_linuxapp             |    1 +
>>>    drivers/net/virtio/Makefile        |    4 +
>>>    drivers/net/virtio/qtest.c         | 1342
>>> ++++++++++++++++++++++++++++++++++++
>>>    drivers/net/virtio/qtest.h         |   65 ++
>>>    drivers/net/virtio/virtio_ethdev.c |  383 +++++++++-
>>>    drivers/net/virtio/virtio_pci.c    |  364 +++++++++-
>>>    drivers/net/virtio/virtio_pci.h    |    5 +-
>>>    7 files changed, 2122 insertions(+), 42 deletions(-)
>>>    create mode 100644 drivers/net/virtio/qtest.c
>>>    create mode 100644 drivers/net/virtio/qtest.h
>>>
>>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>>> index 452f39c..f6e53bc 100644
>>> --- a/config/common_linuxapp
>>> +++ b/config/common_linuxapp
>>> @@ -533,3 +533,4 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
>>>    # Enable virtio support for container
>>>    #
>>>    CONFIG_RTE_VIRTIO_VDEV=y
>>> +CONFIG_RTE_VIRTIO_VDEV_QTEST=y
>>> diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
>>> index ef920f9..6c11378 100644
>>> --- a/drivers/net/virtio/Makefile
>>> +++ b/drivers/net/virtio/Makefile
>>> @@ -56,6 +56,10 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
>>>        SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost_embedded.c
>>>    endif
>>>    +ifeq ($(CONFIG_RTE_VIRTIO_VDEV_QTEST),y)
>>> +    SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
>>> +endif
>>> +
>>>    # this lib depends upon:
>>>    DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal
>>> lib/librte_ether
>>>    DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool
>>> lib/librte_mbuf
>>> diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
>>> new file mode 100644
>>> index 0000000..061aab5
>>> --- /dev/null
>>> +++ b/drivers/net/virtio/qtest.c
>>> @@ -0,0 +1,1342 @@
>>> +/*-
>>> + *   BSD LICENSE
>>> + *
>>> + *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
>>> + *   All rights reserved.
>>> + *
>>> + *   Redistribution and use in source and binary forms, with or without
>>> + *   modification, are permitted provided that the following conditions
>>> + *   are met:
>>> + *
>>> + *     * Redistributions of source code must retain the above copyright
>>> + *       notice, this list of conditions and the following disclaimer.
>>> + *     * Redistributions in binary form must reproduce the above
>>> copyright
>>> + *       notice, this list of conditions and the following
>>> disclaimer in
>>> + *       the documentation and/or other materials provided with the
>>> + *       distribution.
>>> + *     * Neither the name of IGEL Co., Ltd. nor the names of its
>>> + *       contributors may be used to endorse or promote products
>>> derived
>>> + *       from this software without specific prior written permission.
>>> + *
>>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>>> CONTRIBUTORS
>>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>>> FITNESS FOR
>>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>>> COPYRIGHT
>>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>>> INCIDENTAL,
>>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
>>> OF USE,
>>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
>>> ON ANY
>>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>>> TORT
>>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
>>> THE USE
>>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>>> DAMAGE.
>>> + */
>>> +#include <stdint.h>
>>> +#include <stdlib.h>
>>> +#include <string.h>
>>> +#include <unistd.h>
>>> +#include <sys/types.h>
>>> +#include <sys/socket.h>
>>> +#include <sys/un.h>
>>> +#include <sys/queue.h>
>>> +#include <signal.h>
>>> +#include <pthread.h>
>>> +#include <sys/stat.h>
>>> +#include <fcntl.h>
>>> +#include <sys/eventfd.h>
>>> +#include <linux/pci_regs.h>
>>> +
>>> +#include <rte_memory.h>
>>> +#include <rte_malloc.h>
>>> +#include <rte_common.h>
>>> +#include <rte_interrupts.h>
>>> +
>>> +#include "virtio_pci.h"
>>> +#include "virtio_logs.h"
>>> +#include "virtio_ethdev.h"
>>> +#include "qtest.h"
>>> +
>>> +#define NB_BAR                          6
>>> +
>>> +/* PIIX3 configuration registers */
>>> +#define PIIX3_REG_ADDR_PIRQA            0x60
>>> +#define PIIX3_REG_ADDR_PIRQB            0x61
>>> +#define PIIX3_REG_ADDR_PIRQC            0x62
>>> +#define PIIX3_REG_ADDR_PIRQD            0x63
>>> +
>>> +/* Device information */
>>> +#define VIRTIO_NET_DEVICE_ID            0x1000
>>> +#define VIRTIO_NET_VENDOR_ID            0x1af4
>>> +#define VIRTIO_NET_IRQ_NUM              10
>>> +#define IVSHMEM_DEVICE_ID               0x1110
>>> +#define IVSHMEM_VENDOR_ID               0x1af4
>>> +#define IVSHMEM_PROTOCOL_VERSION        0
>>> +#define PIIX3_DEVICE_ID                 0x7000
>>> +#define PIIX3_VENDOR_ID                 0x8086
>>> +
>>> +/* ------------------------------------------------------------
>>> + * IO port mapping of qtest guest
>>> + * ------------------------------------------------------------
>>> + * 0x0000 - 0xbfff : not used
>>> + * 0xc000 - 0xc03f : virtio-net(BAR0)
>>> + * 0xc040 - 0xffff : not used
>>> + *
>>> + * ------------------------------------------------------------
>>> + * Memory mapping of qtest quest
>>> + * ------------------------------------------------------------
>>> + * 0x00000000_00000000 - 0x00000000_3fffffff : not used
>>> + * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
>>> + * 0x00000000_40001000 - 0x00000000_40ffffff : not used
>>> + * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
>>> + * 0x00000000_41800000 - 0x00000000_41ffffff : not used
>>> + * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
>>> + * 0x00000000_42000100 - 0x00000000_42ffffff : not used
>>> + * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)
>>> + *
>> Is it possible to arrange multiple virtio-net devices here? What's the
>> challenges?
> Yes, you can manage multiple virtio-net devices here, if you define
> correct memory map.

So this memory map will be decided here or in qemu?

>
>> Seems that lots of below code do the same work as libqos. So can we
>> just link libqos? Or we need to maintain this code.
> Problem is libqos will be GPL.
> So I wrote the code from scratch.

OK, great! To make it extensible for other virtio devices (like scsi), 
we need to abstract those code as a qtest utils, and virtio-net related 
code into other .c file.

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD
  2016-03-04  6:10             ` Tan, Jianfeng
@ 2016-03-04  9:53               ` Tetsuya Mukawa
  0 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-04  9:53 UTC (permalink / raw)
  To: Tan, Jianfeng, dev

On 2016/03/04 15:10, Tan, Jianfeng wrote:
> Hi Tetsuya,
>
> On 3/4/2016 1:05 PM, Tetsuya Mukawa wrote:
>> On 2016/03/04 11:18, Tan, Jianfeng wrote:
>>> Hi Tetsuya,
>>>
>>> Seems that this patch is too long. Is it possible to split into
>>> multiple commits?
>> Hi Jianfeng,
>>
>> Sure, will do.
>>
>>> On 2/22/2016 4:17 PM, Tetsuya Mukawa wrote:
>>>> The patch adds a new virtio-net PMD configuration that allows the
>>>> PMD to
>>>> work on host as if the PMD is in VM.
>>>> Here is new configuration for virtio-net PMD.
>>>>    - CONFIG_RTE_VIRTIO_VDEV_QTEST
>>>> To use this mode, EAL needs map all hugepages as one file. Also the
>>>> file
>>>> should be mapped between (1 << 31) and (1 << 44). And start address
>>>> should be aligned by EAL memory size.
>>>>
>>>> To allocate like above, use below options.
>>>>    --single-file
>>>>    --range-virtaddr=0x80000000-0x100000000000
>>>>    --align-memsize
>>>> If a free regions isn't found, EAL will return error.
>>>>
>>>> To prepare virtio-net device on host, the users need to invoke QEMU
>>>> process in special qtest mode. This mode is mainly used for testing
>>>> QEMU
>>>> devices from outer process. In this mode, no guest runs.
>>>> Here is QEMU command line.
>>>>
>>>>    $ qemu-system-x86_64 \
>>>>        -machine pc-i440fx-1.4,accel=qtest \
>>>>        -display none -qtest-log /dev/null \
>>>>        -qtest unix:/tmp/socket,server \
>>>>        -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
>>>>        -device
>>>> virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
>>>>        -chardev socket,id=chr1,path=/tmp/ivshmem,server \
>>>>        -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4
>>>>
>>>>    * Should use qemu-2.5.1, or above.
>>>>    * QEMU process is needed per port.
>>>>    * virtio-1.0 device are only supported.
>>>>    * The vhost backends like vhost-net and vhost-user can be
>>>> specified.
>>>>    * In most cases, just using above command is enough, but you can
>>>> also
>>>>      specify other QEMU virtio-net options.
>>>>    * Only checked "pc-i440fx-1.4" machine, but may work with other
>>>>      machines.
>>>>    * Should not add "--enable-kvm" to QEMU command line.
>>> Correct me if wrong: all control msgs go through qemu process, e.g.,
>>> tx notifications and rx interrupts need follow frontend-qemu-backend
>>> path. Question: qemu is started without --enable-kvm, as I understand,
>>> ioeventfd, the basis of kickfd/callfd, will not be available. So how
>>> does qemu kick backend or be kicked by backend?
>> Actually, vhost-backend process will receive kickfd and callfd as -1.
>> (Currently, we have a bug in librte_vhost, because the library treats -1
>> as "not initialized state". But actually without "--enable-kvm", -1 will
>> be set by qemu to initialize kickfd and callfd. I will send a patch for
>> the issue with next patch series.)
>
> Yes, we noticed the problem too: librte_vhost judges virtio_is_ready
> by whether both fds are set. But except that, what's kernel's way to
> do the judgement? In addition, it would be better to be a independent
> fix patch.
>

Even if fd is -1, "VHOST_SET_VRING_KICK/CALL" will be issued.
So we can know correct timing.

I have tested current QEMU, and reviewed kernel code to know how kernel
treats it.
And I've fount below.

 - vhost-user backend case.
 "-1" will be set as kickfd and callfd.
 - kernel vhost-net backend case.
 Actually eventfd will be set as kickfd and callfd.

So if backend is vhost-net, vhost-net will use eventfd.

>>
>> In our case, virtio-net driver and vhost-backend driver are PMD. So we
>> don't use kickfd and callfd, right?
>>
>> If you worried about vhost-net case, vhost-net kernel thread will work
>> without ioeventfd and irqfd.
>> In this case, virtio-net PMD can kick the vhost-net by accessing
>> VIRTIO_PCI_QUEUE_NOTIFY register.
>> (vhost-net doesn't need to kick virtio-net driver, because the driver is
>> PMD.)
>
> I ask this question because I think interrupt mode will help the
> scalability. Return to your solution, by accessing
> VIRTIO_PCI_QUEUE_NOTIFY register, virtio-net PMD can only wake up
> qemu, but how does qemu wakes up vhost-net under the case that kickfd
> = callfd = -1.
>

Answer is above.
It seems QEMU will set eventfd for vhost-net case.
Only with vhost-user backend case, QEMU sends "-1" as kickfd and callfd.

>>>> After invoking QEMU, the PMD can connect to QEMU process using unix
>>>> domain sockets. Over these sockets, virtio-net, ivshmem and piix3
>>>> device in QEMU are probed by the PMD.
>>>> Here is example of command line.
>>>>
>>>>    $ testpmd -c f -n 1 -m 1024 --no-pci --single-file \
>>>>         --range-virtaddr=0x80000000-0x100000000000 --align-memsize \
>>>>       
>>>> --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
>>>>         -- --disable-hw-vlan --txqflags=0xf00 -i
>>>>
>>>> Please specify same unix domain sockets and memory size in both QEMU
>>>> and DPDK command lines like above.
>>>> The share memory size should be power of 2, because ivshmem only
>>>> accepts such memory size.
>>>>
>>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>>> ---
>>>>    config/common_linuxapp             |    1 +
>>>>    drivers/net/virtio/Makefile        |    4 +
>>>>    drivers/net/virtio/qtest.c         | 1342
>>>> ++++++++++++++++++++++++++++++++++++
>>>>    drivers/net/virtio/qtest.h         |   65 ++
>>>>    drivers/net/virtio/virtio_ethdev.c |  383 +++++++++-
>>>>    drivers/net/virtio/virtio_pci.c    |  364 +++++++++-
>>>>    drivers/net/virtio/virtio_pci.h    |    5 +-
>>>>    7 files changed, 2122 insertions(+), 42 deletions(-)
>>>>    create mode 100644 drivers/net/virtio/qtest.c
>>>>    create mode 100644 drivers/net/virtio/qtest.h
>>>>
>>>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>>>> index 452f39c..f6e53bc 100644
>>>> --- a/config/common_linuxapp
>>>> +++ b/config/common_linuxapp
>>>> @@ -533,3 +533,4 @@ CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
>>>>    # Enable virtio support for container
>>>>    #
>>>>    CONFIG_RTE_VIRTIO_VDEV=y
>>>> +CONFIG_RTE_VIRTIO_VDEV_QTEST=y
>>>> diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
>>>> index ef920f9..6c11378 100644
>>>> --- a/drivers/net/virtio/Makefile
>>>> +++ b/drivers/net/virtio/Makefile
>>>> @@ -56,6 +56,10 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
>>>>        SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost_embedded.c
>>>>    endif
>>>>    +ifeq ($(CONFIG_RTE_VIRTIO_VDEV_QTEST),y)
>>>> +    SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
>>>> +endif
>>>> +
>>>>    # this lib depends upon:
>>>>    DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal
>>>> lib/librte_ether
>>>>    DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool
>>>> lib/librte_mbuf
>>>> diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
>>>> new file mode 100644
>>>> index 0000000..061aab5
>>>> --- /dev/null
>>>> +++ b/drivers/net/virtio/qtest.c
>>>> @@ -0,0 +1,1342 @@
>>>> +/*-
>>>> + *   BSD LICENSE
>>>> + *
>>>> + *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
>>>> + *   All rights reserved.
>>>> + *
>>>> + *   Redistribution and use in source and binary forms, with or
>>>> without
>>>> + *   modification, are permitted provided that the following
>>>> conditions
>>>> + *   are met:
>>>> + *
>>>> + *     * Redistributions of source code must retain the above
>>>> copyright
>>>> + *       notice, this list of conditions and the following
>>>> disclaimer.
>>>> + *     * Redistributions in binary form must reproduce the above
>>>> copyright
>>>> + *       notice, this list of conditions and the following
>>>> disclaimer in
>>>> + *       the documentation and/or other materials provided with the
>>>> + *       distribution.
>>>> + *     * Neither the name of IGEL Co., Ltd. nor the names of its
>>>> + *       contributors may be used to endorse or promote products
>>>> derived
>>>> + *       from this software without specific prior written
>>>> permission.
>>>> + *
>>>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>>>> CONTRIBUTORS
>>>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
>>>> NOT
>>>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>>>> FITNESS FOR
>>>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>>>> COPYRIGHT
>>>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>>>> INCIDENTAL,
>>>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>>>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
>>>> OF USE,
>>>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
>>>> ON ANY
>>>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>>>> TORT
>>>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
>>>> THE USE
>>>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>>>> DAMAGE.
>>>> + */
>>>> +#include <stdint.h>
>>>> +#include <stdlib.h>
>>>> +#include <string.h>
>>>> +#include <unistd.h>
>>>> +#include <sys/types.h>
>>>> +#include <sys/socket.h>
>>>> +#include <sys/un.h>
>>>> +#include <sys/queue.h>
>>>> +#include <signal.h>
>>>> +#include <pthread.h>
>>>> +#include <sys/stat.h>
>>>> +#include <fcntl.h>
>>>> +#include <sys/eventfd.h>
>>>> +#include <linux/pci_regs.h>
>>>> +
>>>> +#include <rte_memory.h>
>>>> +#include <rte_malloc.h>
>>>> +#include <rte_common.h>
>>>> +#include <rte_interrupts.h>
>>>> +
>>>> +#include "virtio_pci.h"
>>>> +#include "virtio_logs.h"
>>>> +#include "virtio_ethdev.h"
>>>> +#include "qtest.h"
>>>> +
>>>> +#define NB_BAR                          6
>>>> +
>>>> +/* PIIX3 configuration registers */
>>>> +#define PIIX3_REG_ADDR_PIRQA            0x60
>>>> +#define PIIX3_REG_ADDR_PIRQB            0x61
>>>> +#define PIIX3_REG_ADDR_PIRQC            0x62
>>>> +#define PIIX3_REG_ADDR_PIRQD            0x63
>>>> +
>>>> +/* Device information */
>>>> +#define VIRTIO_NET_DEVICE_ID            0x1000
>>>> +#define VIRTIO_NET_VENDOR_ID            0x1af4
>>>> +#define VIRTIO_NET_IRQ_NUM              10
>>>> +#define IVSHMEM_DEVICE_ID               0x1110
>>>> +#define IVSHMEM_VENDOR_ID               0x1af4
>>>> +#define IVSHMEM_PROTOCOL_VERSION        0
>>>> +#define PIIX3_DEVICE_ID                 0x7000
>>>> +#define PIIX3_VENDOR_ID                 0x8086
>>>> +
>>>> +/* ------------------------------------------------------------
>>>> + * IO port mapping of qtest guest
>>>> + * ------------------------------------------------------------
>>>> + * 0x0000 - 0xbfff : not used
>>>> + * 0xc000 - 0xc03f : virtio-net(BAR0)
>>>> + * 0xc040 - 0xffff : not used
>>>> + *
>>>> + * ------------------------------------------------------------
>>>> + * Memory mapping of qtest quest
>>>> + * ------------------------------------------------------------
>>>> + * 0x00000000_00000000 - 0x00000000_3fffffff : not used
>>>> + * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
>>>> + * 0x00000000_40001000 - 0x00000000_40ffffff : not used
>>>> + * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
>>>> + * 0x00000000_41800000 - 0x00000000_41ffffff : not used
>>>> + * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
>>>> + * 0x00000000_42000100 - 0x00000000_42ffffff : not used
>>>> + * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)
>>>> + *
>>> Is it possible to arrange multiple virtio-net devices here? What's the
>>> challenges?
>> Yes, you can manage multiple virtio-net devices here, if you define
>> correct memory map.
>
> So this memory map will be decided here or in qemu?

Here, we can decide memory map of "all PCI devices".
And we will initialize them along with this memory mapping.

Actually some memory mapping is in QEMU also.
For example, first 1MB is allocated for firmware. We should avoid to
overlap it.
It seems this is decided in machine setting file.
Actually not well documented, but please see "hw/i386/pc.c" of QEMU code.

>
>>
>>> Seems that lots of below code do the same work as libqos. So can we
>>> just link libqos? Or we need to maintain this code.
>> Problem is libqos will be GPL.
>> So I wrote the code from scratch.
>
> OK, great! To make it extensible for other virtio devices (like scsi),
> we need to abstract those code as a qtest utils, and virtio-net
> related code into other .c file.
>

OK, while splitting my long patch, I will separate qtest utils from
virtio-net related code.
If someone uses this extension for other QEMU devices, actually it's
very interesting!

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH v4 00/12] Virtio-net PMD: QEMU QTest extension for container
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
  2016-03-04  2:20         ` Tan, Jianfeng
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 01/12] virtio: Retrieve driver name from eth_dev Tetsuya Mukawa
                           ` (11 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The patches will work on below patch series.
 - [PATCH v2 0/5] virtio support for container

[Changes]
v4 changes:
 - Rebase on latest master.
 - Split patches.
 - To abstract qtest code more, change interface between current virtio
   code and qtest code.
 - Rename qtest.c to qtest_utils.c
 - Change implementation like below.
   - Set pci device information out of qtest abstraction, then pass it to
     qtest to initialize devices.
 - Remove redundant condition checking from qtest_raw_send/recv().
 - Fix return value of qtest_raw_send().

v3 changes:
 - Rebase on latest master.
 - remove "-qtest-virtio" option, then add "--range-virtaddr" and
   "--align-memsize" options.
 - Fix typos in qtest.c

v2 changes:
 - Rebase on above patch seiries.
 - Rebase on master
 - Add "--qtest-virtio" EAL option.
 - Fixes in qtest.c
  - Fix error handling for the case qtest connection is closed.
  - Use eventfd for interrupt messaging.
  - Use linux header for PCI register definitions.
  - Fix qtest_raw_send/recv to handle error correctly.
  - Fix bit mask of PCI_CONFIG_ADDR.
  - Describe memory and ioport usage of qtest guest in qtest.c
  - Remove loop that is for finding PCI devices.


[Abstraction]

Normally, virtio-net PMD only works on VM, because there is no virtio-net device on host.
This patches extend  virtio-net PMD to be able to work on host as virtual PMD.
But we didn't implement virtio-net device as a part of virtio-net PMD.
To prepare virtio-net device for the PMD, start QEMU process with special QTest mode, then connect it from virtio-net PMD through unix domain socket.

The PMD can connect to anywhere QEMU virtio-net device can.
For example, the PMD can connects to vhost-net kernel module and vhost-user backend application.
Similar to virtio-net PMD on QEMU, application memory that uses virtio-net PMD will be shared between vhost backend application.
But vhost backend application memory will not be shared.

Main target of this PMD is container like docker, rkt, lxc and etc.
We can isolate related processes(virtio-net PMD process, QEMU and vhost-user backend process) by container.
But, to communicate through unix domain socket, shared directory will be needed.


[How to use]

 Please use QEMU-2.5.1, or above.
 (So far, QEMU-2.5.1 hasn't been released yet, so please checkout master from QEMU repository)

 - Compile
 Set "CONFIG_RTE_VIRTIO_VDEV_QTEST=y" in config/common_linux.
 Then compile it.

 - Start QEMU like below.
 $ qemu-system-x86_64 \
              -machine pc-i440fx-1.4,accel=qtest \
              -display none -qtest-log /dev/null \
              -qtest unix:/tmp/socket,server \
              -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
              -device virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
              -chardev socket,id=chr1,path=/tmp/ivshmem,server \
              -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4

 - Start DPDK application like below
 $ testpmd -c f -n 1 -m 1024 --no-pci --single-file --qtest-virtio \
             --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
             -- --disable-hw-vlan --txqflags=0xf00 -i

(*1) Please Specify same memory size in QEMU and DPDK command line.
(*2) Should use qemu-2.5.1, or above.
(*3) QEMU process is needed per port.
(*4) virtio-1.0 device are only supported.
(*5) The vhost backends like vhost-net and vhost-user can be specified.
(*6) In most cases, just using above command is enough, but you can also
     specify other QEMU virtio-net options.
(*7) Only checked "pc-i440fx-1.4" machine, but may work with other
     machines. It depends on a machine has piix3 south bridge.
     If the machine doesn't have, virtio-net PMD cannot receive status
     changed interrupts.
(*8) Should not add "--enable-kvm" to QEMU command line.


[Detailed Description]

 - virtio-net device implementation
The PMD uses QEMU virtio-net device. To do that, QEMU QTest functionality is used.
QTest is a test framework of QEMU devices. It allows us to implement a device driver outside of QEMU.
With QTest, we can implement DPDK application and virtio-net PMD as standalone process on host.
When QEMU is invoked as QTest mode, any guest code will not run.
To know more about QTest, see below.
http://wiki.qemu.org/Features/QTest

 - probing devices
QTest provides a unix domain socket. Through this socket, driver process can access to I/O port and memory of QEMU virtual machine.
The PMD will send I/O port accesses to probe pci devices.
If we can find virtio-net and ivshmem device, initialize the devices.
Also, I/O port accesses of virtio-net PMD will be sent through socket, and virtio-net PMD can initialize vitio-net device on QEMU correctly.

 - ivshmem device to share memory
To share memory that virtio-net PMD process uses, ivshmem device will be used.
Because ivshmem device can only handle one file descriptor, shared memory should be consist of one file.
To allocate such a memory, EAL has new option called "--single-file".
Also, the hugepages should be mapped between "1 << 31" to "1 << 44".
To map like above, EAL has one more new option called "-qtest-virtio".
While initializing ivshmem device, we can set BAR(Base Address Register).
It represents which memory QEMU vcpu can access to this shared memory.
We will specify host virtual address of shared memory as this address.
It is very useful because we don't need to apply patch to QEMU to calculate address offset.
(For example, if virtio-net PMD process will allocate memory from shared memory, then specify the virtual address of it to virtio-net register, QEMU virtio-net device can understand it without calculating address offset.)


Tetsuya Mukawa (12):
  virtio: Retrieve driver name from eth_dev
  vhost: Add a function to check virtio device type
  EAL: Add a new "--range-virtaddr" option
  EAL: Add a new "--align-memsize" option
  virtio,qtest: Add QTest utility basic functions
  virtio,qtest: Add pci device initialization function to qtest utils
  virtio,qtest: Add functionality to share memory between QTest guest
  virtio,qtest: Add functionality to handle interrupt
  virtio,qtest: Add misc functions to handle pci information
  virtio: Add QTest support to vtpci abstraction
  virtio: Add QTest support for virtio-net PMD
  docs: add release note for qtest virtio container support

 config/common_base                         |    1 +
 doc/guides/rel_notes/release_16_04.rst     |    3 +
 drivers/net/virtio/Makefile                |    4 +
 drivers/net/virtio/qtest.h                 |   94 +++
 drivers/net/virtio/qtest_utils.c           | 1223 ++++++++++++++++++++++++++++
 drivers/net/virtio/qtest_utils.h           |  355 ++++++++
 drivers/net/virtio/virtio_ethdev.c         |  509 +++++++++++-
 drivers/net/virtio/virtio_ethdev.h         |   32 +
 drivers/net/virtio/virtio_pci.c            |  368 ++++++++-
 drivers/net/virtio/virtio_pci.h            |    9 +-
 lib/librte_eal/common/eal_common_options.c |   17 +
 lib/librte_eal/common/eal_internal_cfg.h   |    3 +
 lib/librte_eal/common/eal_options.h        |    4 +
 lib/librte_eal/linuxapp/eal/eal.c          |   43 +
 lib/librte_eal/linuxapp/eal/eal_memory.c   |   91 ++-
 15 files changed, 2687 insertions(+), 69 deletions(-)
 create mode 100644 drivers/net/virtio/qtest.h
 create mode 100644 drivers/net/virtio/qtest_utils.c
 create mode 100644 drivers/net/virtio/qtest_utils.h

-- 
2.1.4

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH v4 01/12] virtio: Retrieve driver name from eth_dev
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
  2016-03-04  2:20         ` Tan, Jianfeng
  2016-03-09  8:33         ` [PATCH v4 00/12] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 02/12] vhost: Add a function to check virtio device type Tetsuya Mukawa
                           ` (10 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

Currently, virtio_dev_info_get() retrieves driver name from pci_drv.
If the driver is virtual PMD, pci_drv will be invalid.
So retrieves the name from eth_dev.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index bff1926..429377b 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1438,7 +1438,7 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
 
-	dev_info->driver_name = dev->driver->pci_drv.name;
+	dev_info->driver_name = dev->data->drv_name;
 	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
 	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 02/12] vhost: Add a function to check virtio device type
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (2 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 01/12] virtio: Retrieve driver name from eth_dev Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 03/12] EAL: Add a new "--range-virtaddr" option Tetsuya Mukawa
                           ` (9 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The patch adds below function to cleanup virtio code.
 - virtio_dev_check()

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c | 52 ++++++++++++++++++--------------------
 drivers/net/virtio/virtio_ethdev.h | 32 +++++++++++++++++++++++
 2 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 429377b..bc631c7 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -371,7 +371,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 	vq->mz = mz;
 	vq->vq_ring_virt_mem = mz->addr;
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		vq->vq_ring_mem = mz->phys_addr;
 
 		/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
@@ -429,7 +429,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 		vq->virtio_net_hdr_vaddr = mz->addr;
 		memset(vq->virtio_net_hdr_vaddr, 0, hdr_size);
 
-		if (dev->dev_type == RTE_ETH_DEV_PCI)
+		if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 			vq->virtio_net_hdr_mem = mz->phys_addr;
 #ifdef RTE_VIRTIO_VDEV
 		else
@@ -439,7 +439,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 	hw->vtpci_ops->setup_queue(hw, vq);
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI)
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
 #ifdef RTE_VIRTIO_VDEV
 	else
@@ -490,15 +490,13 @@ static void
 virtio_dev_close(struct rte_eth_dev *dev)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
 
 	/* reset the NIC */
-	if (dev->dev_type == RTE_ETH_DEV_PCI) {
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-			vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
-	}
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, RTE_PCI_DRV_INTR_LSC))
+		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
+
 	vtpci_reset(hw);
 	hw->started = 0;
 	virtio_dev_free_mbufs(dev);
@@ -1001,7 +999,7 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 	isr = vtpci_isr(hw);
 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI)
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
 			PMD_DRV_LOG(ERR, "interrupt enable failed");
 
@@ -1056,9 +1054,10 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	pci_dev = eth_dev->pci_dev;
 
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		if (vtpci_init(pci_dev, hw) < 0)
 			return -1;
+	}
 
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
@@ -1072,7 +1071,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 		return -1;
 
 	/* If host does not support status then disable LSC */
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI) {
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
 			pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
 
@@ -1154,13 +1153,14 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
 			hw->max_rx_queues, hw->max_tx_queues);
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI) {
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
 		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
 			     eth_dev->data->port_id, pci_dev->id.vendor_id,
 			     pci_dev->id.device_id);
 
 		/* Setup interrupt callback  */
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+		if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI,
+					NULL, RTE_PCI_DRV_INTR_LSC))
 			rte_intr_callback_register(&pci_dev->intr_handle,
 						   virtio_interrupt_handler,
 						   eth_dev);
@@ -1197,11 +1197,11 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->data->mac_addrs = NULL;
 
 	/* reset interrupt callback  */
-	if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-			rte_intr_callback_unregister(&pci_dev->intr_handle,
-						     virtio_interrupt_handler,
-						     eth_dev);
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, RTE_PCI_DRV_INTR_LSC))
+		rte_intr_callback_unregister(&pci_dev->intr_handle,
+					     virtio_interrupt_handler,
+					     eth_dev);
+
 	rte_eal_pci_unmap_device(pci_dev);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
@@ -1248,7 +1248,6 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 {
 	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	PMD_INIT_LOG(DEBUG, "configure");
 
@@ -1266,12 +1265,11 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 		return -ENOTSUP;
 	}
 
-	if (dev->dev_type == RTE_ETH_DEV_PCI) {
-		if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
-			if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
-				PMD_DRV_LOG(ERR, "failed to set config vector");
-				return -EBUSY;
-			}
+	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, RTE_PCI_DRV_INTR_LSC)) {
+		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
+			PMD_DRV_LOG(ERR, "failed to set config vector");
+			return -EBUSY;
+		}
 	}
 
 	return 0;
@@ -1283,11 +1281,11 @@ virtio_dev_start(struct rte_eth_dev *dev)
 {
 	uint16_t nb_queues, i;
 	struct virtio_hw *hw = dev->data->dev_private;
-	struct rte_pci_device *pci_dev = dev->pci_dev;
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
-		if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+		if (!virtio_dev_check(dev, RTE_ETH_DEV_PCI,
+					NULL, RTE_PCI_DRV_INTR_LSC)) {
 			PMD_DRV_LOG(ERR, "link status not supported by host");
 			return -ENOTSUP;
 		}
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index fde77ca..223b56d 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -67,6 +67,38 @@
 	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
 	 1ULL << VIRTIO_F_VERSION_1)
 
+static inline int
+virtio_dev_check(struct rte_eth_dev *dev, enum rte_eth_dev_type type,
+		const char *name, uint64_t flags)
+{
+	int ret;
+
+	if (dev == NULL)
+		return 0;
+
+	if (type != 0)
+		ret = (dev->dev_type == type);
+	else
+		ret = 1;
+
+	if (name != 0)
+		ret &= (strncmp(dev->data->name, name, strlen(name)) == 0);
+	else
+		ret &= 1;
+
+	if (flags != 0) {
+		if (dev->dev_type == RTE_ETH_DEV_PCI)
+			ret &= (dev->pci_dev->driver->drv_flags & flags) ? 1 : 0;
+		else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+			ret &= (dev->data->dev_flags & flags) ? 1 : 0;
+		else
+			ret = 0;
+	} else
+		ret &= 1;
+
+	return ret;
+}
+
 /*
  * CQ function prototype
  */
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 03/12] EAL: Add a new "--range-virtaddr" option
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (3 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 02/12] vhost: Add a function to check virtio device type Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 04/12] EAL: Add a new "--align-memsize" option Tetsuya Mukawa
                           ` (8 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The option specifies how to mmap EAL memory.
If the option is specified like '--range-virtaddr=<addr1>-<addr2>',
EAL will check /proc/maps, then tries to find free region between addr1
and addr2. If a region is found, EAL will treat it as if 'base-virtaddr'
is specified. Because of this, the option will not work with
'--base-virtaddr'.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_eal/common/eal_common_options.c |  9 ++++
 lib/librte_eal/common/eal_internal_cfg.h   |  2 +
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/linuxapp/eal/eal.c          | 39 ++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 82 +++++++++++++++++++++++++++++-
 5 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 65bccbd..3b4f789 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -74,6 +74,7 @@ eal_short_options[] =
 const struct option
 eal_long_options[] = {
 	{OPT_BASE_VIRTADDR,     1, NULL, OPT_BASE_VIRTADDR_NUM    },
+	{OPT_RANGE_VIRTADDR,    1, NULL, OPT_RANGE_VIRTADDR_NUM   },
 	{OPT_CREATE_UIO_DEV,    0, NULL, OPT_CREATE_UIO_DEV_NUM   },
 	{OPT_FILE_PREFIX,       1, NULL, OPT_FILE_PREFIX_NUM      },
 	{OPT_HELP,              0, NULL, OPT_HELP_NUM             },
@@ -137,6 +138,8 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
 		internal_cfg->hugepage_info[i].lock_descriptor = -1;
 	internal_cfg->base_virtaddr = 0;
+	internal_cfg->range_virtaddr_start = 0;
+	internal_cfg->range_virtaddr_end = 0;
 
 	internal_cfg->syslog_facility = LOG_DAEMON;
 	/* default value from build option */
@@ -985,6 +988,12 @@ eal_check_common_options(struct internal_config *internal_cfg)
 		return -1;
 	}
 
+	if (internal_cfg->base_virtaddr && internal_cfg->range_virtaddr_end) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_RANGE_VIRTADDR" cannot "
+			"be specified together with --"OPT_BASE_VIRTADDR"\n");
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 9117ed9..0734630 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -78,6 +78,8 @@ struct internal_config {
 	volatile unsigned force_sockets;
 	volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
 	uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
+	uintptr_t range_virtaddr_start;   /**< start address of mappable region */
+	uintptr_t range_virtaddr_end;     /**< end address of mappable region */
 	volatile int syslog_facility;	  /**< facility passed to openlog() */
 	volatile uint32_t log_level;	  /**< default log level */
 	/** default interrupt mode for VFIO */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index e5da14a..8e4cf1d 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -47,6 +47,8 @@ enum {
 	OPT_LONG_MIN_NUM = 256,
 #define OPT_BASE_VIRTADDR     "base-virtaddr"
 	OPT_BASE_VIRTADDR_NUM,
+#define OPT_RANGE_VIRTADDR    "range-virtaddr"
+	OPT_RANGE_VIRTADDR_NUM,
 #define OPT_CREATE_UIO_DEV    "create-uio-dev"
 	OPT_CREATE_UIO_DEV_NUM,
 #define OPT_FILE_PREFIX       "file-prefix"
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 6bae02c..62b7a57 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -444,6 +444,35 @@ eal_parse_base_virtaddr(const char *arg)
 }
 
 static int
+eal_parse_range_virtaddr(const char *range)
+{
+	char *p, *endptr;
+	uint64_t tmp_start, tmp_end;
+
+	p = strchr(range, '-');
+	if (p == NULL)
+		return -1;
+	*p++ = '\0';
+
+	errno = 0;
+	tmp_start = strtoul(range, &endptr, 0);
+	if ((errno != 0) || endptr == NULL || (*endptr != '\0'))
+		return -1;
+
+	tmp_end = strtoul(p, &endptr, 0);
+	if ((errno != 0) || endptr == NULL || (*endptr != '\0'))
+		return -1;
+
+	if (tmp_start >= tmp_end)
+		return -1;
+
+	internal_config.range_virtaddr_start = tmp_start;
+	internal_config.range_virtaddr_end = tmp_end;
+
+	return 0;
+}
+
+static int
 eal_parse_vfio_intr(const char *mode)
 {
 	unsigned i;
@@ -604,6 +633,16 @@ eal_parse_args(int argc, char **argv)
 			}
 			break;
 
+		case OPT_RANGE_VIRTADDR_NUM:
+			if (eal_parse_range_virtaddr(optarg) < 0) {
+				RTE_LOG(ERR, EAL, "invalid parameter for --"
+						OPT_RANGE_VIRTADDR "\n");
+				eal_usage(prgname);
+				ret = -1;
+				goto out;
+			}
+			break;
+
 		case OPT_VFIO_INTR_NUM:
 			if (eal_parse_vfio_intr(optarg) < 0) {
 				RTE_LOG(ERR, EAL, "invalid parameters for --"
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index bc04b3d..e15bf4c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -251,6 +251,66 @@ aslr_enabled(void)
 }
 
 /*
+ * Find memory space that fits user request.
+ */
+static uintptr_t
+rte_eal_get_free_region(uint64_t pagesz)
+{
+	uint64_t alloc_size, start, end, next_start;
+	uint64_t low_limit, high_limit;
+	uintptr_t addr = 0;
+	char buf[1024], *p;
+	FILE *fp;
+
+	alloc_size = internal_config.memory;
+	low_limit = internal_config.range_virtaddr_start;
+	high_limit = internal_config.range_virtaddr_end;
+
+	/* allocation size should be aligned by page size */
+	if (alloc_size != RTE_ALIGN_CEIL(alloc_size, pagesz)) {
+		rte_panic("Invalid allocation size 0x%lx\n", alloc_size);
+		return 0;
+	}
+
+	fp = fopen("/proc/self/maps", "r");
+	if (fp == NULL) {
+		rte_panic("Cannot open /proc/self/maps\n");
+		return 0;
+	}
+
+	next_start = 0;
+	do {
+		start = next_start;
+
+		if ((p = fgets(buf, sizeof(buf), fp)) != NULL) {
+			if (sscanf(p, "%lx-%lx ", &end, &next_start) < 2)
+				break;
+
+			next_start = RTE_ALIGN_CEIL(next_start, alloc_size);
+			end = RTE_ALIGN_CEIL(end, alloc_size) - 1;
+		} else
+			end = UINT64_MAX;
+
+		if (start >= high_limit)
+			break;
+		if (end < low_limit)
+			continue;
+
+		start = RTE_MAX(start, low_limit);
+		end = RTE_MIN(end, high_limit - 1);
+
+		if (end - start >= alloc_size - 1) {
+			addr = start;
+			break;
+		}
+	} while (end != UINT64_MAX);
+
+	fclose(fp);
+
+	return addr;
+}
+
+/*
  * Try to mmap *size bytes in /dev/zero. If it is successful, return the
  * pointer to the mmap'd area and keep *size unmodified. Else, retry
  * with a smaller zone: decrease *size by hugepage_sz until it reaches
@@ -1095,6 +1155,25 @@ rte_eal_hugepage_init(void)
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
 
+	if (internal_config.range_virtaddr_end) {
+		uint64_t pagesize = RTE_PGSIZE_4K;
+		struct hugepage_info *hpi;
+		unsigned n;
+		uintptr_t addr;
+
+		/* determine maximum hugepage size */
+		for (n = 0; n < internal_config.num_hugepage_sizes; n++) {
+			hpi = &internal_config.hugepage_info[n];
+			pagesize = RTE_MAX(hpi->hugepage_sz, pagesize);
+		}
+
+		addr = rte_eal_get_free_region(pagesize);
+		if (addr == 0)
+			RTE_LOG(WARNING, EAL,
+				"no free space to mmap in specified region\n");
+		internal_config.base_virtaddr = addr;
+	}
+
 	/* when hugetlbfs is disabled or single-file option is specified */
 	if (internal_config.no_hugetlbfs || internal_config.single_file) {
 		int fd;
@@ -1127,7 +1206,8 @@ rte_eal_hugepage_init(void)
 			return -1;
 		}
 
-		addr = mmap(NULL, internal_config.memory,
+		addr = mmap((void *)internal_config.base_virtaddr,
+			    internal_config.memory,
 			    PROT_READ | PROT_WRITE,
 			    MAP_SHARED | MAP_POPULATE, fd, 0);
 		if (addr == MAP_FAILED) {
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 04/12] EAL: Add a new "--align-memsize" option
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (4 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 03/12] EAL: Add a new "--range-virtaddr" option Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 05/12] virtio, qtest: Add QTest utility basic functions Tetsuya Mukawa
                           ` (7 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The option will work with "--range-virtaddr", and if the option is
specified, mapped address will be align by EAL memory size.
Such an alignment is required for using virtio-net PMD extension
on container that uses QEMU QTest framework.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_eal/common/eal_common_options.c | 8 ++++++++
 lib/librte_eal/common/eal_internal_cfg.h   | 1 +
 lib/librte_eal/common/eal_options.h        | 2 ++
 lib/librte_eal/linuxapp/eal/eal.c          | 4 ++++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 9 +++++++++
 5 files changed, 24 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 3b4f789..853420a 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -75,6 +75,7 @@ const struct option
 eal_long_options[] = {
 	{OPT_BASE_VIRTADDR,     1, NULL, OPT_BASE_VIRTADDR_NUM    },
 	{OPT_RANGE_VIRTADDR,    1, NULL, OPT_RANGE_VIRTADDR_NUM   },
+	{OPT_ALIGN_MEMSIZE,     0, NULL, OPT_ALIGN_MEMSIZE_NUM    },
 	{OPT_CREATE_UIO_DEV,    0, NULL, OPT_CREATE_UIO_DEV_NUM   },
 	{OPT_FILE_PREFIX,       1, NULL, OPT_FILE_PREFIX_NUM      },
 	{OPT_HELP,              0, NULL, OPT_HELP_NUM             },
@@ -140,6 +141,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 	internal_cfg->base_virtaddr = 0;
 	internal_cfg->range_virtaddr_start = 0;
 	internal_cfg->range_virtaddr_end = 0;
+	internal_cfg->align_memsize = 0;
 
 	internal_cfg->syslog_facility = LOG_DAEMON;
 	/* default value from build option */
@@ -994,6 +996,12 @@ eal_check_common_options(struct internal_config *internal_cfg)
 		return -1;
 	}
 
+	if (internal_cfg->range_virtaddr_end == 0 && internal_cfg->align_memsize) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_RANGE_VIRTADDR" should be "
+			"specified together with --"OPT_ALIGN_MEMSIZE"\n");
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 0734630..df33a9f 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -80,6 +80,7 @@ struct internal_config {
 	uintptr_t base_virtaddr;          /**< base address to try and reserve memory from */
 	uintptr_t range_virtaddr_start;   /**< start address of mappable region */
 	uintptr_t range_virtaddr_end;     /**< end address of mappable region */
+	volatile unsigned align_memsize;  /**< true to align virtaddr by memory size */
 	volatile int syslog_facility;	  /**< facility passed to openlog() */
 	volatile uint32_t log_level;	  /**< default log level */
 	/** default interrupt mode for VFIO */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 8e4cf1d..9e36f68 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -49,6 +49,8 @@ enum {
 	OPT_BASE_VIRTADDR_NUM,
 #define OPT_RANGE_VIRTADDR    "range-virtaddr"
 	OPT_RANGE_VIRTADDR_NUM,
+#define OPT_ALIGN_MEMSIZE     "align-memsize"
+	OPT_ALIGN_MEMSIZE_NUM,
 #define OPT_CREATE_UIO_DEV    "create-uio-dev"
 	OPT_CREATE_UIO_DEV_NUM,
 #define OPT_FILE_PREFIX       "file-prefix"
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 62b7a57..e2a0096 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -643,6 +643,10 @@ eal_parse_args(int argc, char **argv)
 			}
 			break;
 
+		case OPT_ALIGN_MEMSIZE_NUM:
+			internal_config.align_memsize = 1;
+			break;
+
 		case OPT_VFIO_INTR_NUM:
 			if (eal_parse_vfio_intr(optarg) < 0) {
 				RTE_LOG(ERR, EAL, "invalid parameters for --"
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index e15bf4c..1c9eb3c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -272,6 +272,15 @@ rte_eal_get_free_region(uint64_t pagesz)
 		return 0;
 	}
 
+	if (internal_config.align_memsize) {
+		/*
+		 * Typically, BAR register of PCI device requiers such
+		 * an alignment.
+		 */
+		low_limit = RTE_ALIGN_CEIL(low_limit, alloc_size);
+		high_limit = RTE_ALIGN_FLOOR(high_limit, alloc_size);
+	}
+
 	fp = fopen("/proc/self/maps", "r");
 	if (fp == NULL) {
 		rte_panic("Cannot open /proc/self/maps\n");
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 05/12] virtio, qtest: Add QTest utility basic functions
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (5 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 04/12] EAL: Add a new "--align-memsize" option Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 06/12] virtio, qtest: Add pci device initialization function to qtest utils Tetsuya Mukawa
                           ` (6 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The patch adds basic functions for accessing to QEMU quest that runs in
QTest mode. The functions will be used by virtio container extension
that can access to the above guest.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 config/common_base               |   1 +
 drivers/net/virtio/Makefile      |   4 +
 drivers/net/virtio/qtest_utils.c | 480 +++++++++++++++++++++++++++++++++++++++
 drivers/net/virtio/qtest_utils.h | 119 ++++++++++
 4 files changed, 604 insertions(+)
 create mode 100644 drivers/net/virtio/qtest_utils.c
 create mode 100644 drivers/net/virtio/qtest_utils.h

diff --git a/config/common_base b/config/common_base
index 340feaf..b19cb59 100644
--- a/config/common_base
+++ b/config/common_base
@@ -260,6 +260,7 @@ CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_DUMP=n
 # Enable virtio support for container
 #
 CONFIG_RTE_VIRTIO_VDEV=n
+CONFIG_RTE_VIRTIO_VDEV_QTEST=n
 
 #
 # Compile burst-oriented VMXNET3 PMD driver
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 9e83852..e6d5a04 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -59,6 +59,10 @@ ifeq ($(CONFIG_RTE_VIRTIO_VDEV),y)
 	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += vhost_embedded.c
 endif
 
+ifeq ($(CONFIG_RTE_VIRTIO_VDEV_QTEST),y)
+	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest_utils.c
+endif
+
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
diff --git a/drivers/net/virtio/qtest_utils.c b/drivers/net/virtio/qtest_utils.c
new file mode 100644
index 0000000..f4cd6af
--- /dev/null
+++ b/drivers/net/virtio/qtest_utils.c
@@ -0,0 +1,480 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <pthread.h>
+#include <fcntl.h>
+
+#include <rte_malloc.h>
+
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+#include "qtest_utils.h"
+
+union qtest_pipefds {
+	struct {
+		int pipefd[2];
+	};
+	struct {
+		int readfd;
+		int writefd;
+	};
+};
+
+struct qtest_session {
+	int qtest_socket;
+	pthread_mutex_t qtest_session_lock;
+
+	pthread_t event_th;
+	int event_th_started;
+	char *evq;
+	char *evq_dequeue_ptr;
+	size_t evq_total_len;
+
+	union qtest_pipefds msgfds;
+};
+
+static int
+qtest_raw_send(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = write(fd, buf, len);
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			return ret;
+		}
+		if (ret == (int)len)
+			break;
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+static int
+qtest_raw_recv(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = read(fd, buf, len);
+		if (ret <= 0) {
+			if (errno == EINTR) {
+				continue;
+			}
+			return ret;
+		}
+		if (ret == (int)len)
+			break;
+		if (*(buf + ret - 1) == '\n')
+			break;
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+/*
+ * To know QTest protocol specification, see below QEMU source code.
+ *  - qemu/qtest.c
+ * If qtest socket is closed, qtest_raw_in and qtest_raw_read will return 0.
+ */
+static uint32_t
+qtest_raw_in(struct qtest_session *s, uint16_t addr, char type)
+{
+	char buf[64];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "in%c 0x%x\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_out(struct qtest_session *s, uint16_t addr, uint32_t val, char type)
+{
+	char buf[64];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "out%c 0x%x 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+static uint32_t
+qtest_raw_read(struct qtest_session *s, uint64_t addr, char type)
+{
+	char buf[64];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "read%c 0x%lx\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_write(struct qtest_session *s, uint64_t addr, uint32_t val, char type)
+{
+	char buf[64];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "write%c 0x%lx 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+/*
+ * qtest_in/out are used for accessing ioport of qemu guest.
+ * qtest_read/write are used for accessing memory of qemu guest.
+ */
+uint32_t
+qtest_in(struct qtest_session *s, uint16_t addr, char type)
+{
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_in(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_out(struct qtest_session *s, uint16_t addr, uint64_t val, char type)
+{
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+uint32_t
+qtest_read(struct qtest_session *s, uint64_t addr, char type)
+{
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_read(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_write(struct qtest_session *s, uint64_t addr, uint64_t val, char type)
+{
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_write(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+static void
+qtest_event_send(struct qtest_session *s, char *buf)
+{
+	int ret;
+
+	/* relay normal message to pipe */
+	ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
+	if (ret < 0)
+		rte_panic("cannot relay normal message\n");
+}
+
+static void
+qtest_close_one_socket(int *fd)
+{
+	if (*fd > 0) {
+		close(*fd);
+		*fd = -1;
+	}
+}
+
+static void
+qtest_close_sockets(struct qtest_session *s)
+{
+	qtest_close_one_socket(&s->qtest_socket);
+	qtest_close_one_socket(&s->msgfds.readfd);
+	qtest_close_one_socket(&s->msgfds.writefd);
+}
+
+static void
+qtest_event_enqueue(struct qtest_session *s, char *buf)
+{
+	size_t len = strlen(buf);
+	char *dest;
+
+	if (s->evq == NULL) {
+		/* allocate one more byte for '\0' */
+		s->evq = malloc(len + 1);
+		if (s->evq == NULL)
+			rte_panic("Cannot allocate memory\n");
+
+		s->evq_dequeue_ptr = s->evq;
+		s->evq_total_len = len + 1;
+		dest = s->evq;
+	} else {
+		size_t offset = s->evq_dequeue_ptr - s->evq;
+
+		s->evq = realloc(s->evq, s->evq_total_len + len);
+		if (s->evq == NULL)
+			rte_panic("Cannot re-allocate memory\n");
+
+		s->evq_dequeue_ptr = s->evq + offset;
+		dest = s->evq + s->evq_total_len - 1;
+		s->evq_total_len += len;
+	}
+
+	strncpy(dest, buf, len);
+	dest[len] = '\0';
+}
+
+static char *
+qtest_event_dequeue(struct qtest_session *s)
+{
+	char *head, *next_head;
+
+	head = s->evq_dequeue_ptr;
+
+	/* make sure message is terminated by '\n' */
+	next_head = strchr(s->evq_dequeue_ptr, '\n');
+	if (next_head == NULL)
+		return NULL;
+
+	/* set next dequeue pointer */
+	s->evq_dequeue_ptr = next_head + 1;
+
+	return head;
+}
+
+static void
+qtest_event_flush(struct qtest_session *s)
+{
+	if (s->evq) {
+		free(s->evq);
+		s->evq = NULL;
+		s->evq_dequeue_ptr = NULL;
+		s->evq_total_len = 0;
+	}
+}
+
+/*
+ * This thread relays QTest response using pipe and eventfd.
+ * The function is needed because we need to separate IRQ message from others.
+ */
+static void *
+qtest_event_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	char buf[64];
+	char *p;
+	int ret;
+
+	for (;;) {
+		memset(buf, 0, sizeof(buf));
+		ret = qtest_raw_recv(s->qtest_socket, buf, sizeof(buf));
+		if (ret <= 0) {
+			PMD_DRV_LOG(EMERG,
+				"QTest connection was closed.\n"
+				"Please detach the port, then start QEMU "
+				"and attach the port again.\n");
+			qtest_close_sockets(s);
+			qtest_event_flush(s);
+			return NULL;
+		}
+
+		qtest_event_enqueue(s, buf);
+
+		/* in the case of incomplete message, receive again */
+		p = &buf[sizeof(buf) - 1];
+		if ((*p != '\0') && (*p != '\n'))
+			continue;
+
+		/* may receive multiple messages at the same time */
+		while ((p = qtest_event_dequeue(s)) != NULL)
+			qtest_event_send(s, p);
+
+		qtest_event_flush(s);
+	}
+	return NULL;
+}
+
+static int
+qtest_open_socket(char *path)
+{
+	struct sockaddr_un sa = {0};
+	int ret, fd, loop = 100;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		return -1;
+
+	sa.sun_family = AF_UNIX;
+	strncpy(sa.sun_path, path, sizeof(sa.sun_path));
+
+	while (loop--) {
+		/*
+		 * If QEMU has multiple sockets needed to be listen, QEMU needs
+		 * some time to start listening a next socket.
+		 * In our case, after connecting ivshmem socket, we may need to wait
+		 * a bit to connect to qtest socket.
+		 */
+		ret = connect(fd, (struct sockaddr *)&sa,
+				sizeof(struct sockaddr_un));
+		if (ret == 0)
+			break;
+		else
+			usleep(100000);
+	}
+
+	if (ret != 0) {
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+void
+qtest_vdev_uninit(struct qtest_session *s)
+{
+	qtest_close_sockets(s);
+	qtest_event_flush(s);
+
+	if (s->event_th_started) {
+		pthread_cancel(s->event_th);
+		pthread_join(s->event_th, NULL);
+		s->event_th_started = 0;
+	}
+
+	pthread_mutex_destroy(&s->qtest_session_lock);
+	rte_free(s);
+}
+
+struct qtest_session *
+qtest_vdev_init(char *qtest_path)
+{
+	struct qtest_session *s;
+	int ret;
+
+	s = rte_zmalloc(NULL, sizeof(*s), RTE_CACHE_LINE_SIZE);
+	if (s == NULL) {
+		PMD_DRV_LOG(ERR, "Failed to allocate memory\n");
+		return NULL;
+	}
+
+	ret = pthread_mutex_init(&s->qtest_session_lock, NULL);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize mutex\n");
+		rte_free(s);
+		return NULL;
+	}
+
+	ret = pipe(s->msgfds.pipefd);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize message pipe\n");
+		goto error;
+	}
+
+	s->qtest_socket = qtest_open_socket(qtest_path);
+	if (s->qtest_socket < 0) {
+		PMD_DRV_LOG(ERR, "Failed to open %s\n", qtest_path);
+		goto error;
+	}
+
+	ret = pthread_create(&s->event_th, NULL, qtest_event_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create event handler\n");
+		goto error;
+	}
+	s->event_th_started = 1;
+
+	return s;
+
+error:
+	qtest_vdev_uninit(s);
+	return NULL;
+}
diff --git a/drivers/net/virtio/qtest_utils.h b/drivers/net/virtio/qtest_utils.h
new file mode 100644
index 0000000..962fc5c
--- /dev/null
+++ b/drivers/net/virtio/qtest_utils.h
@@ -0,0 +1,119 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_QTEST_UTILS_H_
+#define _VIRTIO_QTEST_UTILS_H_
+
+/**
+ * @internal
+ * Initialization function of QTest utility.
+ *
+ * @param qtest_path
+ *   Path of qtest socket.
+ * @return
+ *   The pointer to qtest session structure.
+ */
+struct qtest_session *qtest_vdev_init(char *qtest_path);
+
+/**
+ * @internal
+ * Finalization function of QTest utility.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ */
+void qtest_vdev_uninit(struct qtest_session *s);
+
+/**
+ * @internal
+ * Read a port of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param addr
+ *   The port address.
+ * @param type
+ *   Size of port. Specify one of 'l', 'w', and 'b'.
+ * @return
+ *   Value read from the port.
+ */
+uint32_t qtest_in(struct qtest_session *s, uint16_t addr, char type);
+
+/**
+ * @internal
+ * Write a port of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param addr
+ *   The port address.
+ * @param val
+ *   Written value.
+ * @param type
+ *   Size of port. Specify one of 'l', 'w', and 'b'.
+ */
+void qtest_out(struct qtest_session *s, uint16_t addr,
+			uint64_t val, char type);
+
+/**
+ * @internal
+ * Read memory of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param addr
+ *   The memory address.
+ * @param type
+ *   Size of port. Specify one of 'l', 'w', and 'b'.
+ * @return
+ *   Value read from the memory.
+ */
+uint32_t qtest_read(struct qtest_session *s, uint64_t addr, char type);
+
+/**
+ * @internal
+ * Write memory of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param addr
+ *   The memory address.
+ * @param val
+ *   Written value.
+ * @param type
+ *   Size of memory. Specify one of 'l', 'w', and 'b'.
+ */
+void qtest_write(struct qtest_session *s, uint64_t addr,
+			uint64_t val, char type);
+
+#endif /* _VIRTIO_QTEST_UTILS_H_ */
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 06/12] virtio, qtest: Add pci device initialization function to qtest utils
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (6 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 05/12] virtio, qtest: Add QTest utility basic functions Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 07/12] virtio, qtest: Add functionality to share memory between QTest guest Tetsuya Mukawa
                           ` (5 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The patch adds general pci device initialization functionality to
qtest utils. It initializes pci devices using qtest messaging.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/qtest_utils.c | 349 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/qtest_utils.h | 114 ++++++++++++-
 2 files changed, 461 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/qtest_utils.c b/drivers/net/virtio/qtest_utils.c
index f4cd6af..000c7e8 100644
--- a/drivers/net/virtio/qtest_utils.c
+++ b/drivers/net/virtio/qtest_utils.c
@@ -43,6 +43,10 @@
 #include "virtio_ethdev.h"
 #include "qtest_utils.h"
 
+#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
+	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
+	((_function) & 0x7) << 8 | ((_offset) & 0xfc))
+
 union qtest_pipefds {
 	struct {
 		int pipefd[2];
@@ -57,6 +61,8 @@ struct qtest_session {
 	int qtest_socket;
 	pthread_mutex_t qtest_session_lock;
 
+	struct qtest_pci_device_list head;
+
 	pthread_t event_th;
 	int event_th_started;
 	char *evq;
@@ -195,6 +201,119 @@ qtest_raw_write(struct qtest_session *s, uint64_t addr, uint32_t val, char type)
 }
 
 /*
+ * qtest_pci_inX/outX are used for accessing PCI configuration space.
+ * The functions are implemented based on PCI configuration space
+ * specification.
+ * Accroding to the spec, access size of read()/write() should be 4 bytes.
+ */
+static int
+qtest_pci_inb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return (tmp >> ((offset & 0x3) * 8)) & 0xff;
+}
+
+static uint32_t
+qtest_pci_inl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return tmp;
+}
+
+static void
+qtest_pci_outl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint32_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, value, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint64_t
+qtest_pci_inq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+	uint64_t val;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val = (uint64_t)qtest_raw_in(s, 0xcfc, 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val |= (uint64_t)qtest_raw_in(s, 0xcfc, 'l') << 32;
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return val;
+}
+
+static void
+qtest_pci_outq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint64_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value & 0xffffffff), 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value >> 32), 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+/*
  * qtest_in/out are used for accessing ioport of qemu guest.
  * qtest_read/write are used for accessing memory of qemu guest.
  */
@@ -254,6 +373,18 @@ qtest_write(struct qtest_session *s, uint64_t addr, uint64_t val, char type)
 		rte_panic("Cannot lock mutex\n");
 }
 
+static struct qtest_pci_device *
+qtest_find_device(struct qtest_session *s, const char *name)
+{
+	struct qtest_pci_device *dev;
+
+	TAILQ_FOREACH(dev, &s->head, next) {
+		if (strcmp(dev->name, name) == 0)
+			return dev;
+	}
+	return NULL;
+}
+
 static void
 qtest_event_send(struct qtest_session *s, char *buf)
 {
@@ -382,6 +513,208 @@ qtest_event_handler(void *data) {
 	return NULL;
 }
 
+/*
+ * Common initialization of PCI device.
+ * To know detail, see pci specification.
+ */
+int
+qtest_init_pci_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t i, bus, device;
+	uint32_t val;
+	uint64_t val64;
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Check header type */
+	val = qtest_pci_inb(s, bus, device, 0, PCI_HEADER_TYPE);
+	if (val != PCI_HEADER_TYPE_NORMAL) {
+		PMD_DRV_LOG(ERR, "Unexpected header type %d\n", val);
+		return -1;
+	}
+
+	/* Check BAR type */
+	for (i = 0; i < NB_BAR; i++) {
+		val = qtest_pci_inl(s, bus, device, 0, dev->bar[i].addr);
+
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_IO)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_1M)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_32:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_32)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_64)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Enable device */
+	val = qtest_pci_inl(s, bus, device, 0, PCI_COMMAND);
+	val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	qtest_pci_outl(s, bus, device, 0, PCI_COMMAND, val);
+
+	/* Calculate BAR size */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffff);
+			val = qtest_pci_inl(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size = ~(val & 0xfffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffffffffffff);
+			val64 = qtest_pci_inq(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size =
+					~(val64 & 0xfffffffffffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Set BAR region */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+qtest_find_pci_device(struct qtest_session *s, const char *name)
+{
+	struct qtest_pci_device *dev;
+	struct rte_pci_addr *addr;
+	uint32_t val;
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL)
+		goto not_found;
+
+	addr = &dev->specified_addr;
+	PMD_DRV_LOG(INFO, "PCI address of %s is %04x:%02x:%02x.%02x\n", name,
+			addr->domain, addr->bus, addr->devid, addr->function);
+
+	val = qtest_pci_inl(s, addr->bus, addr->devid, addr->function, 0);
+	if (val == ((uint32_t)dev->device_id << 16 | dev->vendor_id)) {
+		dev->bus_addr = addr->bus;
+		dev->device_addr = addr->devid;
+		return 0;
+	}
+
+not_found:
+	PMD_DRV_LOG(ERR, "%s isn' found\n", name);
+	return -1;
+}
+
+static int
+qtest_init_pci_devices(struct qtest_session *s,
+		struct qtest_pci_device *devices, int devnum)
+{
+	struct qtest_pci_device *dev;
+	int i, ret;
+
+
+	/* Try to find devices */
+	for (i = 0; i < devnum; i++) {
+		ret = qtest_find_pci_device(s, devices[i].name);
+		if (ret < 0)
+			return -1;
+	}
+
+	/* Initialize devices */
+	TAILQ_FOREACH(dev, &s->head, next) {
+		ret = dev->init(s, dev);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void
+qtest_remove_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *dev, *next;
+
+	for (dev = TAILQ_FIRST(&s->head); dev != NULL; dev = next) {
+		next = TAILQ_NEXT(dev, next);
+		TAILQ_REMOVE(&s->head, dev, next);
+		free(dev);
+	}
+}
+
+static int
+qtest_register_target_devices(struct qtest_session *s,
+		struct qtest_pci_device *devices, int devnum)
+{
+	struct qtest_pci_device *device;
+	int i;
+
+	TAILQ_INIT(&s->head);
+
+	for (i = 0; i < devnum; i++) {
+		device = malloc(sizeof(*device));
+		if (device == NULL) {
+			qtest_remove_target_devices(s);
+			return -1;
+		}
+
+		*device = devices[i];
+		TAILQ_INSERT_TAIL(&s->head, device, next);
+	}
+
+	return 0;
+}
+
 static int
 qtest_open_socket(char *path)
 {
@@ -431,11 +764,13 @@ qtest_vdev_uninit(struct qtest_session *s)
 	}
 
 	pthread_mutex_destroy(&s->qtest_session_lock);
+	qtest_remove_target_devices(s);
 	rte_free(s);
 }
 
 struct qtest_session *
-qtest_vdev_init(char *qtest_path)
+qtest_vdev_init(char *qtest_path,
+		struct qtest_pci_device *devices, int devnum)
 {
 	struct qtest_session *s;
 	int ret;
@@ -459,6 +794,12 @@ qtest_vdev_init(char *qtest_path)
 		goto error;
 	}
 
+	ret = qtest_register_target_devices(s, devices, devnum);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize qtest session\n");
+		goto error;
+	}
+
 	s->qtest_socket = qtest_open_socket(qtest_path);
 	if (s->qtest_socket < 0) {
 		PMD_DRV_LOG(ERR, "Failed to open %s\n", qtest_path);
@@ -472,6 +813,12 @@ qtest_vdev_init(char *qtest_path)
 	}
 	s->event_th_started = 1;
 
+	ret = qtest_init_pci_devices(s, devices, devnum);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize devices\n");
+		goto error;
+	}
+
 	return s;
 
 error:
diff --git a/drivers/net/virtio/qtest_utils.h b/drivers/net/virtio/qtest_utils.h
index 962fc5c..ba70754 100644
--- a/drivers/net/virtio/qtest_utils.h
+++ b/drivers/net/virtio/qtest_utils.h
@@ -34,16 +34,114 @@
 #ifndef _VIRTIO_QTEST_UTILS_H_
 #define _VIRTIO_QTEST_UTILS_H_
 
+#include <sys/queue.h>
+#include <linux/pci_regs.h>
+
+#define NB_BAR                          6
+
+/*
+ * QTest utilities
+ *
+ * This utility assumes QTest guest will have below 3 pci devices.
+ * - piix3
+ *    It will be used for enabling interrupts from target device.
+ * - ivshmme
+ *    It will be used for enabling shared memory between guest and DPDK PMD.
+ * - target device
+ *    It will be the device DPDK PMD wants to use.
+ *    So far, virtio-net device is the only use case.
+ *
+ * To use the utilities, DPDK PMD needs to define above device information.
+ * Then call qtest_vdev_init().
+ * To handle multiple target devices in one QEMU guest, piix3 handling should
+ * be changed.
+ */
+
+enum qtest_pci_bar_type {
+	QTEST_PCI_BAR_DISABLE = 0,
+	QTEST_PCI_BAR_IO,
+	QTEST_PCI_BAR_MEMORY_UNDER_1MB,
+	QTEST_PCI_BAR_MEMORY_32,
+	QTEST_PCI_BAR_MEMORY_64
+};
+
+/*
+ * A structure used to specify BAR information.
+ *
+ * - type
+ *    Specify type of this device.
+ * - addr
+ *    Specify one of PCI_BASE_ADDRESS_0/../5.
+ * - region_start
+ *    Specify physical address of this device. Because a guest cpu will access
+ *    this device using the address, this address should not be over lapped by
+ *    others.
+ * - region_size
+ *    Will be filled by QTest utility while initializing the device.
+ */
+struct qtest_pci_bar {
+	enum qtest_pci_bar_type type;
+	uint8_t addr;
+	uint64_t region_start;
+	uint64_t region_size;
+};
+
+struct qtest_session;
+
+/*
+ * A structure used to specify pci device information.
+ *
+ * - name
+ *    Specify name of this device.
+ * - device_id
+ *    Specify device id of this device.
+ * - vendor_id
+ *    Specify vendor id of this device.
+ * - bus_addr
+ *    Will be filled by QTest utility.
+ *    It will be bus address of this device.
+ * - device_addr
+ *    Will be filled by QTest utility.
+ *    It will be device address of this device.
+ * - bar
+ *    Specify bar structure for this device.
+ * - specified_addr
+ *    Specify pci address of this device.
+ *    QTest utility will not check any other pci address for this device.
+ *    If it's wrong, device initialization will be failed.
+ * - init
+ *   Specify initialization function.
+ *   If the device is generic device, just specify qtest_init_pci_device().
+ */
+TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
+struct qtest_pci_device {
+	TAILQ_ENTRY(qtest_pci_device) next;
+	const char *name;
+	uint16_t device_id;
+	uint16_t vendor_id;
+	uint8_t bus_addr;
+	uint8_t device_addr;
+	struct qtest_pci_bar bar[NB_BAR];
+	struct rte_pci_addr specified_addr;
+	int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
+};
+
 /**
  * @internal
  * Initialization function of QTest utility.
  *
  * @param qtest_path
  *   Path of qtest socket.
+ * @param devices
+ *   Array of device information. It should contain piix3, ivshmem and target
+ *   device(virtio-net device).
+ * @param devnum
+ *   The number of device information.
  * @return
  *   The pointer to qtest session structure.
  */
-struct qtest_session *qtest_vdev_init(char *qtest_path);
+struct qtest_session *qtest_vdev_init(char *qtest_path,
+		struct qtest_pci_device *devices, int devnum);
 
 /**
  * @internal
@@ -116,4 +214,18 @@ uint32_t qtest_read(struct qtest_session *s, uint64_t addr, char type);
 void qtest_write(struct qtest_session *s, uint64_t addr,
 			uint64_t val, char type);
 
+/**
+ * @internal
+ * Initialization function of general device.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param dev
+ *   The pointer of pci device.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_init_pci_device(struct qtest_session *s,
+			struct qtest_pci_device *dev);
+
 #endif /* _VIRTIO_QTEST_UTILS_H_ */
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 07/12] virtio, qtest: Add functionality to share memory between QTest guest
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (7 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 06/12] virtio, qtest: Add pci device initialization function to qtest utils Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 08/12] virtio, qtest: Add functionality to handle interrupt Tetsuya Mukawa
                           ` (4 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The patch adds functionality to share memory between QTest guest and
DPDK application using ivshmem device.
The shared memory will be all EAL memory on hugepages. This memory will
be accessed by QEMU vcpu and DPDK application using same address.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/qtest_utils.c | 106 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/qtest_utils.h |   4 +-
 2 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/qtest_utils.c b/drivers/net/virtio/qtest_utils.c
index 000c7e8..338224a 100644
--- a/drivers/net/virtio/qtest_utils.c
+++ b/drivers/net/virtio/qtest_utils.c
@@ -43,6 +43,9 @@
 #include "virtio_ethdev.h"
 #include "qtest_utils.h"
 
+/* ivshmem configuration */
+#define IVSHMEM_PROTOCOL_VERSION        0
+
 #define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
 	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
 	((_function) & 0x7) << 8 | ((_offset) & 0xfc))
@@ -59,6 +62,7 @@ union qtest_pipefds {
 
 struct qtest_session {
 	int qtest_socket;
+	int ivshmem_socket;
 	pthread_mutex_t qtest_session_lock;
 
 	struct qtest_pci_device_list head;
@@ -411,6 +415,7 @@ qtest_close_sockets(struct qtest_session *s)
 	qtest_close_one_socket(&s->qtest_socket);
 	qtest_close_one_socket(&s->msgfds.readfd);
 	qtest_close_one_socket(&s->msgfds.writefd);
+	qtest_close_one_socket(&s->ivshmem_socket);
 }
 
 static void
@@ -716,6 +721,93 @@ qtest_register_target_devices(struct qtest_session *s,
 }
 
 static int
+qtest_send_message_to_ivshmem(int sock_fd, uint64_t client_id, int shm_fd)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = &client_id;
+	iov.iov_len = sizeof(client_id);
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (shm_fd >= 0) {
+		msgh.msg_control = &control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), &shm_fd, fdsize);
+	}
+
+	do {
+		ret = sendmsg(sock_fd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int
+qtest_setup_shared_memory(struct qtest_session *s)
+{
+	int shm_fd, num, ret;
+	struct back_file *huges;
+
+	num = rte_eal_get_backfile_info(&huges);
+	if (num != 1) {
+		PMD_DRV_LOG(ERR,
+			"Not supported memory configuration\n");
+		return -1;
+	}
+
+	shm_fd = open(huges[0].filepath, O_RDWR);
+	if (shm_fd < 0) {
+		PMD_DRV_LOG(ERR,
+			"Cannot open file: %s\n", huges[0].filepath);
+		return -1;
+	}
+
+	/* send our protocol version first */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket,
+			IVSHMEM_PROTOCOL_VERSION, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR,
+			"Failed to send protocol version to ivshmem\n");
+		return -1;
+	}
+
+	/* send client id */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, 0, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to send VMID to ivshmem\n");
+		return -1;
+	}
+
+	/* send message to ivshmem */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, -1, shm_fd);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to file descriptor to ivshmem\n");
+		return -1;
+	}
+
+	close(shm_fd);
+
+	return 0;
+}
+
+static int
 qtest_open_socket(char *path)
 {
 	struct sockaddr_un sa = {0};
@@ -769,7 +861,7 @@ qtest_vdev_uninit(struct qtest_session *s)
 }
 
 struct qtest_session *
-qtest_vdev_init(char *qtest_path,
+qtest_vdev_init(char *qtest_path, char *ivshmem_path,
 		struct qtest_pci_device *devices, int devnum)
 {
 	struct qtest_session *s;
@@ -800,6 +892,12 @@ qtest_vdev_init(char *qtest_path,
 		goto error;
 	}
 
+	s->ivshmem_socket = qtest_open_socket(ivshmem_path);
+	if (s->ivshmem_socket < 0) {
+		PMD_DRV_LOG(ERR, "Failed to open %s\n", ivshmem_path);
+		goto error;
+	}
+
 	s->qtest_socket = qtest_open_socket(qtest_path);
 	if (s->qtest_socket < 0) {
 		PMD_DRV_LOG(ERR, "Failed to open %s\n", qtest_path);
@@ -813,6 +911,12 @@ qtest_vdev_init(char *qtest_path,
 	}
 	s->event_th_started = 1;
 
+	ret = qtest_setup_shared_memory(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to setup shared memory\n");
+		goto error;
+	}
+
 	ret = qtest_init_pci_devices(s, devices, devnum);
 	if (ret != 0) {
 		PMD_DRV_LOG(ERR, "Failed to initialize devices\n");
diff --git a/drivers/net/virtio/qtest_utils.h b/drivers/net/virtio/qtest_utils.h
index ba70754..26994b1 100644
--- a/drivers/net/virtio/qtest_utils.h
+++ b/drivers/net/virtio/qtest_utils.h
@@ -132,6 +132,8 @@ struct qtest_pci_device {
  *
  * @param qtest_path
  *   Path of qtest socket.
+ * @param ivshmem_path
+ *   Path of ivshmem socket.
  * @param devices
  *   Array of device information. It should contain piix3, ivshmem and target
  *   device(virtio-net device).
@@ -140,7 +142,7 @@ struct qtest_pci_device {
  * @return
  *   The pointer to qtest session structure.
  */
-struct qtest_session *qtest_vdev_init(char *qtest_path,
+struct qtest_session *qtest_vdev_init(char *qtest_path, char *ivshmem_path,
 		struct qtest_pci_device *devices, int devnum);
 
 /**
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 08/12] virtio, qtest: Add functionality to handle interrupt
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (8 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 07/12] virtio, qtest: Add functionality to share memory between QTest guest Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 09/12] virtio, qtest: Add misc functions to handle pci information Tetsuya Mukawa
                           ` (3 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The patch adds functionality to handle interrupt from pci device of
QEMU guest. To handle the interrupts, the patch adds to initialize piix3
pci device.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/qtest_utils.c | 225 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/qtest_utils.h |  68 +++++++++++-
 2 files changed, 287 insertions(+), 6 deletions(-)

diff --git a/drivers/net/virtio/qtest_utils.c b/drivers/net/virtio/qtest_utils.c
index 338224a..337546a 100644
--- a/drivers/net/virtio/qtest_utils.c
+++ b/drivers/net/virtio/qtest_utils.c
@@ -36,6 +36,7 @@
 #include <sys/un.h>
 #include <pthread.h>
 #include <fcntl.h>
+#include <sys/eventfd.h>
 
 #include <rte_malloc.h>
 
@@ -43,6 +44,12 @@
 #include "virtio_ethdev.h"
 #include "qtest_utils.h"
 
+/* PIIX3 configuration registers */
+#define PIIX3_REG_ADDR_PIRQA            0x60
+#define PIIX3_REG_ADDR_PIRQB            0x61
+#define PIIX3_REG_ADDR_PIRQC            0x62
+#define PIIX3_REG_ADDR_PIRQD            0x63
+
 /* ivshmem configuration */
 #define IVSHMEM_PROTOCOL_VERSION        0
 
@@ -74,6 +81,14 @@ struct qtest_session {
 	size_t evq_total_len;
 
 	union qtest_pipefds msgfds;
+
+	int irqno;
+	pthread_t intr_th;
+	int intr_th_started;
+	int eventfd;
+	rte_atomic16_t enable_intr;
+	rte_intr_callback_fn cb;
+	void *cb_arg;
 };
 
 static int
@@ -230,6 +245,29 @@ qtest_pci_inb(struct qtest_session *s, uint8_t bus, uint8_t device,
 	return (tmp >> ((offset & 0x3) * 8)) & 0xff;
 }
 
+static void
+qtest_pci_outb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint8_t value)
+{
+	uint32_t addr, tmp, pos;
+
+	addr = PCI_CONFIG_ADDR(bus, device, function, offset);
+	pos = (offset % 4) * 8;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+	tmp = (tmp & ~(0xff << pos)) | (value << pos);
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	qtest_raw_out(s, 0xcfc, tmp, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
 static uint32_t
 qtest_pci_inl(struct qtest_session *s, uint8_t bus, uint8_t device,
 		uint8_t function, uint8_t offset)
@@ -389,15 +427,112 @@ qtest_find_device(struct qtest_session *s, const char *name)
 	return NULL;
 }
 
+int
+qtest_intr_enable(struct qtest_session *s)
+{
+	rte_atomic16_set(&s->enable_intr, 1);
+
+	return 0;
+}
+
+int
+qtest_intr_disable(struct qtest_session *s)
+{
+	rte_atomic16_set(&s->enable_intr, 0);
+
+	return 0;
+}
+
+void
+qtest_intr_callback_register(struct qtest_session *s,
+		rte_intr_callback_fn cb, void *cb_arg)
+{
+	s->cb = cb;
+	s->cb_arg = cb_arg;
+	rte_atomic16_set(&s->enable_intr, 1);
+}
+
+void
+qtest_intr_callback_unregister(struct qtest_session *s,
+		rte_intr_callback_fn cb __rte_unused,
+		void *cb_arg __rte_unused)
+{
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->cb = NULL;
+	s->cb_arg = NULL;
+}
+
+static void *
+qtest_intr_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	eventfd_t value;
+	int ret;
+
+	for (;;) {
+		ret = eventfd_read(s->eventfd, &value);
+		if (ret < 0)
+			return NULL;
+		s->cb(NULL, s->cb_arg);
+	}
+	return NULL;
+}
+
+static int
+qtest_intr_initialize(struct qtest_session *s)
+{
+	char buf[64];
+	int ret;
+
+	snprintf(buf, sizeof(buf), "irq_intercept_in ioapic\n");
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	/* To enable interrupt, send "irq_intercept_in" message to QEMU */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	/* just ignore QEMU response */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return 0;
+}
+
 static void
 qtest_event_send(struct qtest_session *s, char *buf)
 {
+	char interrupt_message[32];
 	int ret;
 
-	/* relay normal message to pipe */
-	ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
-	if (ret < 0)
-		rte_panic("cannot relay normal message\n");
+	/* This message will come when interrupt occurs */
+	snprintf(interrupt_message, sizeof(interrupt_message),
+			"IRQ raise %d", s->irqno);
+
+	if (strncmp(buf, interrupt_message,
+				strlen(interrupt_message)) == 0) {
+		if (rte_atomic16_read(&s->enable_intr) == 0)
+			return;
+
+		/* relay interrupt to eventfd */
+		ret = eventfd_write(s->eventfd, 1);
+		if (ret < 0)
+			rte_panic("cannot relay interrupt\n");
+	} else {
+		/* relay normal message to pipe */
+		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
+		if (ret < 0)
+			rte_panic("cannot relay normal message\n");
+	}
 }
 
 static void
@@ -415,6 +550,7 @@ qtest_close_sockets(struct qtest_session *s)
 	qtest_close_one_socket(&s->qtest_socket);
 	qtest_close_one_socket(&s->msgfds.readfd);
 	qtest_close_one_socket(&s->msgfds.writefd);
+	qtest_close_one_socket(&s->eventfd);
 	qtest_close_one_socket(&s->ivshmem_socket);
 }
 
@@ -518,6 +654,57 @@ qtest_event_handler(void *data) {
 	return NULL;
 }
 
+/* This function should be fixed when multiple target devices are supported */
+int
+qtest_init_piix3_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t bus, device, slot = 0;
+	struct qtest_pci_device *tmpdev;
+	uint8_t pcislot2regaddr[] = {	0xff,
+					0xff,
+					0xff,
+					PIIX3_REG_ADDR_PIRQC,
+					PIIX3_REG_ADDR_PIRQD,
+					PIIX3_REG_ADDR_PIRQA,
+					PIIX3_REG_ADDR_PIRQB};
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Get slot id that is connected to target device(virtio-net device) */
+	TAILQ_FOREACH(tmpdev, &s->head, next) {
+		if (strcmp(tmpdev->name, "piix3") != 0 &&
+				strcmp(tmpdev->name, "ivshmem") != 0) {
+			slot = tmpdev->device_addr;
+			break;
+		}
+	}
+
+	if (slot == 0)
+		return -1;
+
+	/*
+	 * Set interrupt routing for target device.
+	 * Here is i440fx/piix3 connection settings
+	 * ---------------------------------------
+	 * PCI Slot3 -> PIRQC
+	 * PCI Slot4 -> PIRQD
+	 * PCI Slot5 -> PIRQA
+	 * PCI Slot6 -> PIRQB
+	 */
+	if (pcislot2regaddr[slot] != 0xff) {
+		qtest_pci_outb(s, bus, device, 0,
+				pcislot2regaddr[slot],
+				s->irqno);
+	}
+
+	return 0;
+}
+
 /*
  * Common initialization of PCI device.
  * To know detail, see pci specification.
@@ -855,6 +1042,12 @@ qtest_vdev_uninit(struct qtest_session *s)
 		s->event_th_started = 0;
 	}
 
+	if (s->intr_th_started) {
+		pthread_cancel(s->intr_th);
+		pthread_join(s->intr_th, NULL);
+		s->intr_th_started = 0;
+	}
+
 	pthread_mutex_destroy(&s->qtest_session_lock);
 	qtest_remove_target_devices(s);
 	rte_free(s);
@@ -862,7 +1055,7 @@ qtest_vdev_uninit(struct qtest_session *s)
 
 struct qtest_session *
 qtest_vdev_init(char *qtest_path, char *ivshmem_path,
-		struct qtest_pci_device *devices, int devnum)
+		int irqno, struct qtest_pci_device *devices, int devnum)
 {
 	struct qtest_session *s;
 	int ret;
@@ -886,12 +1079,21 @@ qtest_vdev_init(char *qtest_path, char *ivshmem_path,
 		goto error;
 	}
 
+	s->eventfd = eventfd(0, 0);
+	if (s->eventfd < 0) {
+		PMD_DRV_LOG(ERR, "Failed to open eventfd\n");
+		goto error;
+	}
+
 	ret = qtest_register_target_devices(s, devices, devnum);
 	if (ret != 0) {
 		PMD_DRV_LOG(ERR, "Failed to initialize qtest session\n");
 		goto error;
 	}
 
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->irqno = irqno;
+
 	s->ivshmem_socket = qtest_open_socket(ivshmem_path);
 	if (s->ivshmem_socket < 0) {
 		PMD_DRV_LOG(ERR, "Failed to open %s\n", ivshmem_path);
@@ -911,6 +1113,19 @@ qtest_vdev_init(char *qtest_path, char *ivshmem_path,
 	}
 	s->event_th_started = 1;
 
+	ret = pthread_create(&s->intr_th, NULL, qtest_intr_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create interrupt handler\n");
+		goto error;
+	}
+	s->intr_th_started = 1;
+
+	ret = qtest_intr_initialize(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize interrupt\n");
+		goto error;
+	}
+
 	ret = qtest_setup_shared_memory(s);
 	if (ret != 0) {
 		PMD_DRV_LOG(ERR, "Failed to setup shared memory\n");
diff --git a/drivers/net/virtio/qtest_utils.h b/drivers/net/virtio/qtest_utils.h
index 26994b1..0717ee9 100644
--- a/drivers/net/virtio/qtest_utils.h
+++ b/drivers/net/virtio/qtest_utils.h
@@ -134,6 +134,8 @@ struct qtest_pci_device {
  *   Path of qtest socket.
  * @param ivshmem_path
  *   Path of ivshmem socket.
+ * @param irqno
+ *   Interrupt number of the target device(virtio-net device).
  * @param devices
  *   Array of device information. It should contain piix3, ivshmem and target
  *   device(virtio-net device).
@@ -143,7 +145,7 @@ struct qtest_pci_device {
  *   The pointer to qtest session structure.
  */
 struct qtest_session *qtest_vdev_init(char *qtest_path, char *ivshmem_path,
-		struct qtest_pci_device *devices, int devnum);
+		int irqno, struct qtest_pci_device *devices, int devnum);
 
 /**
  * @internal
@@ -156,6 +158,56 @@ void qtest_vdev_uninit(struct qtest_session *s);
 
 /**
  * @internal
+ * Register interrupt callback.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param cb
+ *   The pointer to callback.
+ * @param cb_arg
+ *   The pointer to callback argument.
+ */
+void qtest_intr_callback_register(struct qtest_session *s,
+		rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * @internal
+ * Unregister interrupt callback.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param cb
+ *   The pointer to callback.
+ * @param cb_arg
+ *   The pointer to callback argument.
+ */
+void qtest_intr_callback_unregister(struct qtest_session *s,
+		rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * @internal
+ * Enable interrupt callback.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_intr_enable(struct qtest_session *s);
+
+/**
+ * @internal
+ * Disable interrupt callback.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_intr_disable(struct qtest_session *s);
+
+/**
+ * @internal
  * Read a port of QEMU guest.
  *
  * @param s
@@ -218,6 +270,20 @@ void qtest_write(struct qtest_session *s, uint64_t addr,
 
 /**
  * @internal
+ * Initialization function of piix3 device.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param dev
+ *   The pointer of pci device.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_init_piix3_device(struct qtest_session *s,
+			struct qtest_pci_device *dev);
+
+/**
+ * @internal
  * Initialization function of general device.
  *
  * @param s
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 09/12] virtio, qtest: Add misc functions to handle pci information
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (9 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 08/12] virtio, qtest: Add functionality to handle interrupt Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 10/12] virtio: Add QTest support to vtpci abstraction Tetsuya Mukawa
                           ` (2 subsequent siblings)
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The patch adds below functions.
 - qtest_read_pci_cfg
 - qtest_get_bar
 - qtest_get_bar_addr
 - qtest_get_bar_size
These are used for handling pci device information.
It will be called by later patches.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/qtest_utils.c | 77 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/virtio/qtest_utils.h | 56 +++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)

diff --git a/drivers/net/virtio/qtest_utils.c b/drivers/net/virtio/qtest_utils.c
index 337546a..55ed504 100644
--- a/drivers/net/virtio/qtest_utils.c
+++ b/drivers/net/virtio/qtest_utils.c
@@ -427,6 +427,83 @@ qtest_find_device(struct qtest_session *s, const char *name)
 	return NULL;
 }
 
+/*
+ * The function is used for reading pci configuration space of specifed device.
+ */
+int
+qtest_read_pci_cfg(struct qtest_session *s, const char *name,
+		void *buf, size_t len, off_t offset)
+{
+	struct qtest_pci_device *dev;
+	uint32_t i;
+	uint8_t *p = buf;
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s\n", name);
+		return -1;
+	}
+
+	for (i = 0; i < len; i++) {
+		*(p + i) = qtest_pci_inb(s,
+				dev->bus_addr, dev->device_addr, 0, offset + i);
+	}
+
+	return 0;
+}
+
+static struct qtest_pci_bar *
+qtest_get_bar(struct qtest_session *s, const char *name, uint8_t bar)
+{
+	struct qtest_pci_device *dev;
+
+	if (bar >= NB_BAR) {
+		PMD_DRV_LOG(ERR, "Invalid bar is specified: %u\n", bar);
+		return NULL;
+	}
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s\n", name);
+		return NULL;
+	}
+
+	if (dev->bar[bar].type == QTEST_PCI_BAR_DISABLE) {
+		PMD_DRV_LOG(ERR, "Cannot find valid BAR(%s): %u\n", name, bar);
+		return NULL;
+	}
+
+	return &dev->bar[bar];
+}
+
+int
+qtest_get_bar_addr(struct qtest_session *s, const char *name,
+		uint8_t bar, uint64_t **addr)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(s, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*addr = (uint64_t *)bar_ptr->region_start;
+	return 0;
+}
+
+int
+qtest_get_bar_size(struct qtest_session *s, const char *name,
+		uint8_t bar, uint64_t *size)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(s, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*size = bar_ptr->region_size;
+	return 0;
+}
+
 int
 qtest_intr_enable(struct qtest_session *s)
 {
diff --git a/drivers/net/virtio/qtest_utils.h b/drivers/net/virtio/qtest_utils.h
index 0717ee9..dfd2b03 100644
--- a/drivers/net/virtio/qtest_utils.h
+++ b/drivers/net/virtio/qtest_utils.h
@@ -270,6 +270,62 @@ void qtest_write(struct qtest_session *s, uint64_t addr,
 
 /**
  * @internal
+ * Read pci configuration space of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param name
+ *   The name of pci device.
+ * @param buf
+ *   The pointer to the buffer.
+ * @param len
+ *   Length to read.
+ * @param offset
+ *   Offset of pci configuration space.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_read_pci_cfg(struct qtest_session *s, const char *name,
+			void *buf, size_t len, off_t offset);
+
+/**
+ * @internal
+ * Get BAR address of a specified pci device.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param name
+ *   The name of pci device.
+ * @param bar
+ *   The index of BAR. Should be between 0 to 5.
+ * @param addr
+ *   The pointer to store BAR address.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_get_bar_addr(struct qtest_session *s, const char *name,
+			uint8_t bar, uint64_t **addr);
+
+/**
+ * @internal
+ * Get BAR size of a specified pci device.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param name
+ *   The name of pci device.
+ * @param bar
+ *   The index of BAR. Should be between 0 to 5.
+ * @param size
+ *   The pointer to store BAR size.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_get_bar_size(struct qtest_session *s, const char *name,
+			uint8_t bar, uint64_t *size);
+
+/**
+ * @internal
  * Initialization function of piix3 device.
  *
  * @param s
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 10/12] virtio: Add QTest support to vtpci abstraction
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (10 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 09/12] virtio, qtest: Add misc functions to handle pci information Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
  2016-03-09  8:33         ` [PATCH v4 12/12] docs: add release note for qtest virtio container support Tetsuya Mukawa
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The patch adds QTest support to vtpci abstraction.
With this patch, only modern virtio device will be supported.
This QTest support will be used by later QTest extension patch of
virtio-net PMD.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/qtest.h         |  39 ++++
 drivers/net/virtio/virtio_ethdev.c |   2 +-
 drivers/net/virtio/virtio_pci.c    | 368 ++++++++++++++++++++++++++++++++++---
 drivers/net/virtio/virtio_pci.h    |   9 +-
 4 files changed, 387 insertions(+), 31 deletions(-)
 create mode 100644 drivers/net/virtio/qtest.h

diff --git a/drivers/net/virtio/qtest.h b/drivers/net/virtio/qtest.h
new file mode 100644
index 0000000..46b9ee6
--- /dev/null
+++ b/drivers/net/virtio/qtest.h
@@ -0,0 +1,39 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_QTEST_H_
+#define _VIRTIO_QTEST_H_
+
+#define QTEST_DRV_NAME		        "eth_qtest_virtio"
+
+#endif /* _VIRTIO_QTEST_H_ */
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index bc631c7..747596d 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1055,7 +1055,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	pci_dev = eth_dev->pci_dev;
 
 	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
-		if (vtpci_init(pci_dev, hw) < 0)
+		if (vtpci_init(eth_dev, hw) < 0)
 			return -1;
 	}
 
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 85fbe88..e88531e 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -37,10 +37,16 @@
  #include <fcntl.h>
 #endif
 
+#include "virtio_ethdev.h"
 #include "virtio_pci.h"
 #include "virtio_logs.h"
 #include "virtqueue.h"
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+#include "qtest.h"
+#include "qtest_utils.h"
+#endif
+
 /*
  * Following macros are derived from linux/pci_regs.h, however,
  * we can't simply include that header here, as there is no such
@@ -440,6 +446,220 @@ static const struct virtio_pci_ops modern_ops = {
 };
 
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+static inline uint8_t
+qtest_read8(struct virtio_hw *hw, uint8_t *addr)
+{
+	return qtest_read(hw->qsession, (uint64_t)addr, 'b');
+}
+
+static inline void
+qtest_write8(struct virtio_hw *hw, uint8_t val, uint8_t *addr)
+{
+	return qtest_write(hw->qsession, (uint64_t)addr, val, 'b');
+}
+
+static inline uint16_t
+qtest_read16(struct virtio_hw *hw, uint16_t *addr)
+{
+	return qtest_read(hw->qsession, (uint64_t)addr, 'w');
+}
+
+static inline void
+qtest_write16(struct virtio_hw *hw, uint16_t val, uint16_t *addr)
+{
+	return qtest_write(hw->qsession, (uint64_t)addr, val, 'w');
+}
+
+static inline uint32_t
+qtest_read32(struct virtio_hw *hw, uint32_t *addr)
+{
+	return qtest_read(hw->qsession, (uint64_t)addr, 'l');
+}
+
+static inline void
+qtest_write32(struct virtio_hw *hw, uint32_t val, uint32_t *addr)
+{
+	return qtest_write(hw->qsession, (uint64_t)addr, val, 'l');
+}
+
+static inline void
+qtest_write64_twopart(struct virtio_hw *hw,
+		uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+	qtest_write32(hw, val & ((1ULL << 32) - 1), lo);
+	qtest_write32(hw, val >> 32,		     hi);
+}
+
+static void
+qtest_modern_read_dev_config(struct virtio_hw *hw, size_t offset,
+		       void *dst, int length)
+{
+	int i;
+	uint8_t *p;
+	uint8_t old_gen, new_gen;
+
+	do {
+		old_gen = qtest_read8(hw, &hw->common_cfg->config_generation);
+
+		p = dst;
+		for (i = 0;  i < length; i++)
+			*p++ = qtest_read8(hw, (uint8_t *)hw->dev_cfg + offset + i);
+
+		new_gen = qtest_read8(hw, &hw->common_cfg->config_generation);
+	} while (old_gen != new_gen);
+}
+
+static void
+qtest_modern_write_dev_config(struct virtio_hw *hw, size_t offset,
+			const void *src, int length)
+{
+	int i;
+	const uint8_t *p = src;
+
+	for (i = 0;  i < length; i++)
+		qtest_write8(hw, *p++, (uint8_t *)hw->dev_cfg + offset + i);
+}
+
+static uint64_t
+qtest_modern_get_features(struct virtio_hw *hw)
+{
+	uint32_t features_lo, features_hi;
+
+	qtest_write32(hw, 0, &hw->common_cfg->device_feature_select);
+	features_lo = qtest_read32(hw, &hw->common_cfg->device_feature);
+
+	qtest_write32(hw, 1, &hw->common_cfg->device_feature_select);
+	features_hi = qtest_read32(hw, &hw->common_cfg->device_feature);
+
+	return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static void
+qtest_modern_set_features(struct virtio_hw *hw, uint64_t features)
+{
+	qtest_write32(hw, 0, &hw->common_cfg->guest_feature_select);
+	qtest_write32(hw, features & ((1ULL << 32) - 1),
+		&hw->common_cfg->guest_feature);
+
+	qtest_write32(hw, 1, &hw->common_cfg->guest_feature_select);
+	qtest_write32(hw, features >> 32,
+		&hw->common_cfg->guest_feature);
+}
+
+static uint8_t
+qtest_modern_get_status(struct virtio_hw *hw)
+{
+	return qtest_read8(hw, &hw->common_cfg->device_status);
+}
+
+static void
+qtest_modern_set_status(struct virtio_hw *hw, uint8_t status)
+{
+	qtest_write8(hw, status, &hw->common_cfg->device_status);
+}
+
+static void
+qtest_modern_reset(struct virtio_hw *hw)
+{
+	modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+	modern_get_status(hw);
+}
+
+static uint8_t
+qtest_modern_get_isr(struct virtio_hw *hw)
+{
+	return qtest_read8(hw, hw->isr);
+}
+
+static uint16_t
+qtest_modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
+{
+	qtest_write16(hw, vec, &hw->common_cfg->msix_config);
+	return qtest_read16(hw, &hw->common_cfg->msix_config);
+}
+
+static uint16_t
+qtest_modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
+{
+	qtest_write16(hw, queue_id, &hw->common_cfg->queue_select);
+	return qtest_read16(hw, &hw->common_cfg->queue_size);
+}
+
+static void
+qtest_modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	uint64_t desc_addr, avail_addr, used_addr;
+	uint16_t notify_off;
+
+	desc_addr = (uint64_t)vq->mz->addr;
+	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
+							 ring[vq->vq_nentries]),
+				   VIRTIO_PCI_VRING_ALIGN);
+
+	qtest_write16(hw, vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	qtest_write64_twopart(hw, desc_addr, &hw->common_cfg->queue_desc_lo,
+				      &hw->common_cfg->queue_desc_hi);
+	qtest_write64_twopart(hw, avail_addr, &hw->common_cfg->queue_avail_lo,
+				       &hw->common_cfg->queue_avail_hi);
+	qtest_write64_twopart(hw, used_addr, &hw->common_cfg->queue_used_lo,
+				      &hw->common_cfg->queue_used_hi);
+
+	notify_off = qtest_read16(hw, &hw->common_cfg->queue_notify_off);
+	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
+				notify_off * hw->notify_off_multiplier);
+
+	qtest_write16(hw, 1, &hw->common_cfg->queue_enable);
+
+	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
+	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
+	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
+	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
+	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
+		vq->notify_addr, notify_off);
+}
+
+static void
+qtest_modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	qtest_write16(hw, vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_desc_lo,
+				  &hw->common_cfg->queue_desc_hi);
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_avail_lo,
+				  &hw->common_cfg->queue_avail_hi);
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_used_lo,
+				  &hw->common_cfg->queue_used_hi);
+
+	qtest_write16(hw, 0, &hw->common_cfg->queue_enable);
+}
+
+static void
+qtest_modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
+{
+	qtest_write16(hw, 1, vq->notify_addr);
+}
+
+static const struct virtio_pci_ops qtest_modern_ops = {
+	.read_dev_cfg	= qtest_modern_read_dev_config,
+	.write_dev_cfg	= qtest_modern_write_dev_config,
+	.reset		= qtest_modern_reset,
+	.get_status	= qtest_modern_get_status,
+	.set_status	= qtest_modern_set_status,
+	.get_features	= qtest_modern_get_features,
+	.set_features	= qtest_modern_set_features,
+	.get_isr	= qtest_modern_get_isr,
+	.set_config_irq	= qtest_modern_set_config_irq,
+	.get_queue_num	= qtest_modern_get_queue_num,
+	.setup_queue	= qtest_modern_setup_queue,
+	.del_queue	= qtest_modern_del_queue,
+	.notify_queue	= qtest_modern_notify_queue,
+};
+#endif /* RTE_VIRTIO_VDEV_QTEST */
+
+
 void
 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
 		      void *dst, int length)
@@ -513,12 +733,16 @@ vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
 }
 
 static void *
-get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
+get_cfg_addr(struct rte_eth_dev *eth_dev,
+		struct virtio_hw *hw,
+		struct virtio_pci_cap *cap)
 {
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
 	uint8_t  bar    = cap->bar;
 	uint32_t length = cap->length;
 	uint32_t offset = cap->offset;
-	uint8_t *base;
+	uint8_t *base = NULL;
+	uint64_t size = 0;
 
 	if (bar > 5) {
 		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
@@ -531,14 +755,29 @@ get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
 		return NULL;
 	}
 
-	if (offset + length > dev->mem_resource[bar].len) {
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
+		size = pci_dev->mem_resource[bar].len;
+		base = pci_dev->mem_resource[bar].addr;
+	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0)) {
+		qtest_get_bar_size(hw->qsession,
+				"virtio-net", bar, &size);
+		qtest_get_bar_addr(hw->qsession,
+				"virtio-net", bar, (uint64_t **)&base);
+	}
+#else
+	RTE_SET_USED(hw);
+#endif
+
+	if (offset + length > size) {
 		PMD_INIT_LOG(ERR,
 			"invalid cap: overflows bar space: %u > %" PRIu64,
-			offset + length, dev->mem_resource[bar].len);
+			offset + length, size);
 		return NULL;
 	}
 
-	base = dev->mem_resource[bar].addr;
 	if (base == NULL) {
 		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
 		return NULL;
@@ -548,25 +787,49 @@ get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
 }
 
 static int
-virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
+virtio_read_pci_config(struct rte_eth_dev *eth_dev,
+			struct virtio_hw *hw,
+			void *buf, size_t len, off_t offset)
 {
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	int ret = -1;
+
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0))
+		ret = rte_eal_pci_read_config(pci_dev, buf, len, offset);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0))
+		ret = qtest_read_pci_cfg(hw->qsession,
+				"virtio-net", buf, len, offset);
+#else
+	RTE_SET_USED(hw);
+#endif
+
+	return ret;
+}
+
+static int
+virtio_read_caps(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
 	uint8_t pos;
 	struct virtio_pci_cap cap;
 	int ret;
 
-	if (rte_eal_pci_map_device(dev)) {
+	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(rte_eal_pci_map_device(pci_dev) < 0)) {
 		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
 		return -1;
 	}
 
-	ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+	ret = virtio_read_pci_config(eth_dev, hw, &pos, 1, PCI_CAPABILITY_LIST);
 	if (ret < 0) {
 		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
 		return -1;
 	}
 
 	while (pos) {
-		ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos);
+		ret = virtio_read_pci_config(eth_dev, hw, &cap, sizeof(cap), pos);
 		if (ret < 0) {
 			PMD_INIT_LOG(ERR,
 				"failed to read pci cap at pos: %x", pos);
@@ -586,18 +849,19 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
 
 		switch (cap.cfg_type) {
 		case VIRTIO_PCI_CAP_COMMON_CFG:
-			hw->common_cfg = get_cfg_addr(dev, &cap);
+			hw->common_cfg = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_NOTIFY_CFG:
-			rte_eal_pci_read_config(dev, &hw->notify_off_multiplier,
+			virtio_read_pci_config(eth_dev, hw,
+						&hw->notify_off_multiplier,
 						4, pos + sizeof(cap));
-			hw->notify_base = get_cfg_addr(dev, &cap);
+			hw->notify_base = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_DEVICE_CFG:
-			hw->dev_cfg = get_cfg_addr(dev, &cap);
+			hw->dev_cfg = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		case VIRTIO_PCI_CAP_ISR_CFG:
-			hw->isr = get_cfg_addr(dev, &cap);
+			hw->isr = get_cfg_addr(eth_dev, hw, &cap);
 			break;
 		}
 
@@ -622,31 +886,77 @@ next:
 	return 0;
 }
 
+static int
+vtpci_modern_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+
+	PMD_INIT_LOG(INFO, "modern virtio pci detected.");
+
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
+		hw->vtpci_ops = &modern_ops;
+		pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
+	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL, NULL, 0)) {
+		hw->vtpci_ops = &qtest_modern_ops;
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+	}
+#endif
+
+	hw->modern = 1;
+
+	return 0;
+}
+
+static int
+vtpci_legacy_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
+{
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	struct virtio_pci_cap cap;
+
+	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0)) {
+		if (legacy_virtio_resource_init(pci_dev, hw) < 0)
+			return -1;
+
+		hw->vtpci_ops = &legacy_ops;
+		hw->use_msix = legacy_virtio_has_msix(&pci_dev->addr);
+	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0)) {
+		PMD_INIT_LOG(ERR, "Legacy virtio device isn't supported.");
+		return -1;
+	}
+#endif
+
+	cap.bar = cap.length = cap.offset = 0;
+	hw->modern = 0;
+
+	return 0;
+}
+
 int
-vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
+vtpci_init(struct rte_eth_dev *eth_dev, struct virtio_hw *hw)
 {
-	hw->dev = dev;
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	int ret;
+
+	hw->dev = pci_dev;
 
 	/*
 	 * Try if we can succeed reading virtio pci caps, which exists
 	 * only on modern pci device. If failed, we fallback to legacy
 	 * virtio handling.
 	 */
-	if (virtio_read_caps(dev, hw) == 0) {
-		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
-		hw->vtpci_ops = &modern_ops;
-		hw->modern    = 1;
-		dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC;
-		return 0;
-	}
+	if (virtio_read_caps(eth_dev, hw) == 0)
+		ret = vtpci_modern_init(eth_dev, hw);
+	else
+		ret = vtpci_legacy_init(eth_dev, hw);
 
-	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
-	if (legacy_virtio_resource_init(dev, hw) < 0)
+	if (ret < 0)
 		return -1;
 
-	hw->vtpci_ops = &legacy_ops;
-	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
-	hw->modern   = 0;
-
 	return 0;
 }
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index ae6777d..9eb210c 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -39,6 +39,10 @@
 #include <rte_pci.h>
 #include <rte_ethdev.h>
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+#include "qtest_utils.h"
+#endif /* RTE_VIRTIO_VDEV_QTEST */
+
 struct virtqueue;
 
 /* VirtIO PCI vendor/device ID. */
@@ -242,6 +246,9 @@ struct virtio_net_config;
 
 struct virtio_hw {
 	struct virtqueue *cvq;
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	struct qtest_session *qsession;
+#endif
 	struct rte_pci_ioport io;
 	uint64_t    guest_features;
 	uint32_t    max_tx_queues;
@@ -306,7 +313,7 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit)
 /*
  * Function declaration from virtio_pci.c
  */
-int vtpci_init(struct rte_pci_device *, struct virtio_hw *);
+int vtpci_init(struct rte_eth_dev *, struct virtio_hw *);
 void vtpci_reset(struct virtio_hw *);
 
 void vtpci_reinit_complete(struct virtio_hw *);
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (11 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 10/12] virtio: Add QTest support to vtpci abstraction Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  2016-06-02  3:29           ` [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
                             ` (9 more replies)
  2016-03-09  8:33         ` [PATCH v4 12/12] docs: add release note for qtest virtio container support Tetsuya Mukawa
  13 siblings, 10 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

The patch adds a new virtio-net PMD configuration that allows the PMD to
work on host as if the PMD is in VM.
Here is new configuration for virtio-net PMD.
 - CONFIG_RTE_VIRTIO_VDEV_QTEST
To use this mode, EAL needs map all hugepages as one file. Also the file
should be mapped between (1 << 31) and (1 << 44). And start address
should be aligned by EAL memory size.

To allocate like above, use below options.
 --single-file
 --range-virtaddr=0x80000000-0x100000000000
 --align-memsize
If a free region cannot be found, EAL will return error.

To prepare virtio-net device on host, the users need to invoke QEMU
process in special QTest mode. This mode is mainly used for testing QEMU
devices from outer process. In this mode, no guest runs.
Here is QEMU command line.

 $ qemu-system-x86_64 \
     -machine pc-i440fx-1.4,accel=qtest \
     -display none -qtest-log /dev/null \
     -qtest unix:/tmp/socket,server \
     -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
     -device
virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
     -chardev socket,id=chr1,path=/tmp/ivshmem,server \
     -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4

 * Should use QEMU-2.5.1, or above.
 * QEMU process is needed per port.
 * virtio-1.0 device are only supported.
 * The vhost backends like vhost-net and vhost-user can be specified.
 * In most cases, just using above command is enough, but you can also
   specify other QEMU virtio-net options like mac address.
 * Only checked "pc-i440fx-1.4" machine, but may work with other
   machines.
 * Should not add "--enable-kvm" to QEMU command line.

After invoking QEMU, the PMD can connect to QEMU process using unix
domain sockets. Over these sockets, virtio-net, ivshmem and piix3
device in QEMU are probed by the PMD.
Here is example of command line.

 $ testpmd -c f -n 1 -m 1024 --no-pci --single-file \
      --range-virtaddr=0x80000000-0x100000000000 --align-memsize \
      --vdev="eth_qtest_virtio0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
      -- --disable-hw-vlan --txqflags=0xf00 -i

Please specify same unix domain sockets and memory size in both QEMU
and DPDK command lines like above.
The share memory size should be power of 2, because ivshmem only
accepts such memory size.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/qtest.h         |  55 +++++
 drivers/net/virtio/virtio_ethdev.c | 457 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 501 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio/qtest.h b/drivers/net/virtio/qtest.h
index 46b9ee6..421e62c 100644
--- a/drivers/net/virtio/qtest.h
+++ b/drivers/net/virtio/qtest.h
@@ -35,5 +35,60 @@
 #define _VIRTIO_QTEST_H_
 
 #define QTEST_DRV_NAME		        "eth_qtest_virtio"
+#define QTEST_DEVICE_NUM                3
+
+#include <linux/pci_regs.h>
+
+/* Device information */
+#define VIRTIO_NET_DEVICE_ID            0x1000
+#define VIRTIO_NET_VENDOR_ID            0x1af4
+#define VIRTIO_NET_IRQ_NUM              10
+#define IVSHMEM_DEVICE_ID               0x1110
+#define IVSHMEM_VENDOR_ID               0x1af4
+#define PIIX3_DEVICE_ID                 0x7000
+#define PIIX3_VENDOR_ID                 0x8086
+
+/* ------------------------------------------------------------
+ * IO port mapping of qtest guest
+ * ------------------------------------------------------------
+ * 0x0000 - 0xbfff : not used
+ * 0xc000 - 0xc03f : virtio-net(BAR0)
+ * 0xc040 - 0xffff : not used
+ *
+ * ------------------------------------------------------------
+ * Memory mapping of qtest quest
+ * ------------------------------------------------------------
+ * 0x00000000_00000000 - 0x00000000_3fffffff : not used
+ * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
+ * 0x00000000_40001000 - 0x00000000_40ffffff : not used
+ * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
+ * 0x00000000_41800000 - 0x00000000_41ffffff : not used
+ * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
+ * 0x00000000_42000100 - 0x00000000_42ffffff : not used
+ * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)
+ *
+ * We can only specify start address of a region. The region size
+ * will be defined by the device implementation in QEMU.
+ * The size will be pow of 2 according to the PCI specification.
+ * Also, the region start address should be aligned by region size.
+ *
+ * BAR2 of ivshmem will be used to mmap DPDK application memory.
+ * So this address will be dynamically changed, but not to overlap
+ * others, it should be mmaped between above addresses. Such allocation
+ * is done by EAL. Check rte_eal_get_free_region() also.
+ */
+#define VIRTIO_NET_IO_START             0xc000
+#define VIRTIO_NET_MEMORY1_START	0x40000000
+#define VIRTIO_NET_MEMORY2_START	0x41000000
+#define IVSHMEM_MEMORY_START            0x42000000
+
+static inline struct rte_pci_id
+qtest_get_pci_id_of_virtio_net(void)
+{
+	struct rte_pci_id id =  {VIRTIO_NET_DEVICE_ID,
+		VIRTIO_NET_VENDOR_ID, PCI_ANY_ID, PCI_ANY_ID};
+
+	return id;
+}
 
 #endif /* _VIRTIO_QTEST_H_ */
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 747596d..4e454db 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -60,6 +60,10 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+#include "qtest.h"
+#include "qtest_utils.h"
+#endif
 
 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
@@ -387,7 +391,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 			return -ENOMEM;
 		}
 	}
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_VDEV_QTEST)
 	else
 		vq->vq_ring_mem = (phys_addr_t)mz->addr; /* Use vaddr!!! */
 #endif
@@ -431,7 +435,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 		if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 			vq->virtio_net_hdr_mem = mz->phys_addr;
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_VDEV_QTEST)
 		else
 			vq->virtio_net_hdr_mem = (phys_addr_t)mz->addr;
 #endif
@@ -441,7 +445,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
 
 	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		vq->offset = offsetof(struct rte_mbuf, buf_physaddr);
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_VDEV_QTEST)
 	else
 		vq->offset = offsetof(struct rte_mbuf, buf_addr);
 #endif
@@ -999,6 +1003,23 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 	isr = vtpci_isr(hw);
 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	if (virtio_dev_check(dev, RTE_ETH_DEV_VIRTUAL, QTEST_DRV_NAME, 0)) {
+		if (qtest_intr_enable(hw->qsession) < 0)
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+		/*
+		 * If last qtest message is interrupt, 'isr' will be 0
+		 * becasue socket has been closed already.
+		 * But still we want to notice this event to EAL.
+		 * So just ignore isr value.
+		 */
+		if (virtio_dev_link_update(dev, 0) == 0)
+			_rte_eth_dev_callback_process(dev,
+					RTE_ETH_EVENT_INTR_LSC);
+		return;
+	}
+#endif
+
 	if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
 		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
 			PMD_DRV_LOG(ERR, "interrupt enable failed");
@@ -1058,6 +1079,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 		if (vtpci_init(eth_dev, hw) < 0)
 			return -1;
 	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev,
+				RTE_ETH_DEV_VIRTUAL, QTEST_DRV_NAME, 0)) {
+		if (vtpci_init(eth_dev, hw) < 0)
+			return -1;
+	}
+#endif
 
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
@@ -1077,6 +1105,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 		rte_eth_copy_pci_info(eth_dev, pci_dev);
 	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev,
+				RTE_ETH_DEV_VIRTUAL, QTEST_DRV_NAME, 0)) {
+		if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
+			pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+	}
+#endif
 
 	rx_func_get(eth_dev);
 
@@ -1165,6 +1200,26 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 						   virtio_interrupt_handler,
 						   eth_dev);
 	}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, 0)) {
+		struct rte_pci_id id;
+
+		id = qtest_get_pci_id_of_virtio_net();
+		RTE_SET_USED(id);
+
+		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
+				eth_dev->data->port_id,
+				id.vendor_id, id.device_id);
+
+		/* Setup interrupt callback  */
+		if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+					NULL, RTE_ETH_DEV_INTR_LSC))
+			qtest_intr_callback_register(hw->qsession,
+					virtio_interrupt_handler, eth_dev);
+	}
+#endif
+
 	virtio_dev_cq_start(eth_dev);
 
 	return 0;
@@ -1202,7 +1257,15 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 					     virtio_interrupt_handler,
 					     eth_dev);
 
-	rte_eal_pci_unmap_device(pci_dev);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+	else if (virtio_dev_check(eth_dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, RTE_ETH_DEV_INTR_LSC))
+		qtest_intr_callback_unregister(hw->qsession,
+				virtio_interrupt_handler, eth_dev);
+#endif
+
+	if (virtio_dev_check(eth_dev, RTE_ETH_DEV_PCI, NULL, 0))
+		rte_eal_pci_unmap_device(pci_dev);
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
@@ -1284,16 +1347,34 @@ virtio_dev_start(struct rte_eth_dev *dev)
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
-		if (!virtio_dev_check(dev, RTE_ETH_DEV_PCI,
-					NULL, RTE_PCI_DRV_INTR_LSC)) {
+		int pdev_has_lsc = 0, vdev_has_lsc = 0;
+
+		pdev_has_lsc = virtio_dev_check(dev, RTE_ETH_DEV_PCI,
+				NULL, RTE_PCI_DRV_INTR_LSC);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+		vdev_has_lsc = virtio_dev_check(dev, RTE_ETH_DEV_VIRTUAL,
+				QTEST_DRV_NAME, RTE_ETH_DEV_INTR_LSC);
+#endif
+
+		if ((!pdev_has_lsc) && (!vdev_has_lsc)) {
 			PMD_DRV_LOG(ERR, "link status not supported by host");
 			return -ENOTSUP;
 		}
 
-		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
-			PMD_DRV_LOG(ERR, "interrupt enable failed");
-			return -EIO;
+		if (pdev_has_lsc) {
+			if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
 		}
+#ifdef RTE_VIRTIO_VDEV_QTEST
+		else if (vdev_has_lsc) {
+			if (qtest_intr_enable(hw->qsession) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
+		}
+#endif
 	}
 
 	/* Initialize Link state */
@@ -1387,11 +1468,20 @@ static void
 virtio_dev_stop(struct rte_eth_dev *dev)
 {
 	struct rte_eth_link link;
+	struct virtio_hw *hw = dev->data->dev_private;
 
 	PMD_INIT_LOG(DEBUG, "stop");
+	RTE_SET_USED(hw);
 
-	if (dev->data->dev_conf.intr_conf.lsc)
-		rte_intr_disable(&dev->pci_dev->intr_handle);
+	if (dev->data->dev_conf.intr_conf.lsc) {
+		if (virtio_dev_check(dev, RTE_ETH_DEV_PCI, NULL, 0))
+			rte_intr_disable(&dev->pci_dev->intr_handle);
+#ifdef RTE_VIRTIO_VDEV_QTEST
+		else if (virtio_dev_check(dev, RTE_ETH_DEV_VIRTUAL,
+					QTEST_DRV_NAME, 0))
+			qtest_intr_disable(hw->qsession);
+#endif
+	}
 
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
@@ -1628,3 +1718,348 @@ static struct rte_driver rte_cvio_driver = {
 PMD_REGISTER_DRIVER(rte_cvio_driver);
 
 #endif
+
+#ifdef RTE_VIRTIO_VDEV_QTEST
+
+#define ETH_VIRTIO_NET_ARG_QTEST_PATH           "qtest"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_PATH         "ivshmem"
+#define ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR      "virtio-net-addr"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR         "ivshmem-addr"
+#define ETH_VIRTIO_NET_ARG_PIIX3_ADDR           "piix3-addr"
+
+static const char *valid_qtest_args[] = {
+	ETH_VIRTIO_NET_ARG_QTEST_PATH,
+	ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
+	ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR,
+	ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR,
+	ETH_VIRTIO_NET_ARG_PIIX3_ADDR,
+	NULL
+};
+
+static int
+get_socket_path_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	char **p;
+
+	if ((value == NULL) || (extra_args == NULL))
+		return -EINVAL;
+
+	p = extra_args;
+	*p = strdup(value);
+
+	if (*p == NULL)
+		return -1;
+
+	return 0;
+}
+
+static int
+get_pci_addr_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	struct rte_pci_addr *addr = extra_args;
+
+	if ((value == NULL) || (extra_args == NULL))
+		return -EINVAL;
+
+	if (eal_parse_pci_DomBDF(value, addr) != 0)
+		return -1;
+
+	if (addr->domain != 0)
+		return -1;
+
+	return 0;
+}
+
+static int
+virtio_net_eth_dev_free(struct rte_eth_dev *eth_dev)
+{
+	struct virtio_hw *hw;
+	int ret;
+
+	ret = rte_eth_dev_release_port(eth_dev);
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "cannot release a port\n");
+		return -1;
+	}
+
+	hw = eth_dev->data->dev_private;
+	rte_free(hw);
+
+	return 0;
+}
+
+static struct rte_eth_dev *
+virtio_net_eth_dev_alloc(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct rte_eth_dev_data *data;
+	struct virtio_hw *hw;
+	int ret;
+
+	eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+	if (eth_dev == NULL) {
+		PMD_INIT_LOG(ERR, "cannot alloc a port\n");
+		return NULL;
+	}
+
+	data = eth_dev->data;
+
+	hw = rte_zmalloc(NULL, sizeof(*hw), 0);
+	if (hw == NULL) {
+		PMD_INIT_LOG(ERR, "malloc virtio_hw failed\n");
+		ret = rte_eth_dev_release_port(eth_dev);
+		if (ret < 0)
+			rte_panic("cannot release a port");
+		return NULL;
+	}
+
+	data->dev_private = hw;
+	eth_dev->driver = &rte_virtio_pmd;
+	return eth_dev;
+}
+
+static int
+virtio_net_eth_pmd_parse_socket_path(struct rte_kvargs *kvlist,
+		const char *option, char **path)
+{
+	int ret;
+
+	if (rte_kvargs_count(kvlist, option) == 1) {
+		ret = rte_kvargs_process(kvlist, option,
+				&get_socket_path_arg, path);
+		if (ret != 0) {
+			PMD_INIT_LOG(ERR,
+					"Failed to connect to %s socket", option);
+			return -1;
+		}
+	} else {
+		PMD_INIT_LOG(ERR, "No argument specified for %s", option);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+virtio_net_eth_pmd_parse_pci_addr(struct rte_kvargs *kvlist,
+		const char *option, struct rte_pci_addr *addr,
+		struct rte_pci_addr *default_addr)
+{
+	int ret;
+
+	if (rte_kvargs_count(kvlist, option) == 1) {
+		ret = rte_kvargs_process(kvlist, option,
+				&get_pci_addr_arg, addr);
+		if (ret != 0) {
+			PMD_INIT_LOG(ERR,
+					"Specified invalid address in '%s'", option);
+			return -1;
+		}
+	} else
+		/* copy default pci address */
+		*addr = *default_addr;
+
+	return 0;
+}
+
+static int
+virtio_prepare_target_devices(struct qtest_pci_device *devices,
+			struct rte_kvargs *kvlist)
+{
+	struct qtest_pci_device *virtio_net, *ivshmem, *piix3;
+	struct rte_pci_addr default_addr;
+	const struct rte_memseg *ms;
+	int ret;
+
+	ms = rte_eal_get_physmem_layout();
+	/* if EAL memory size isn't pow of 2, ivshmem will refuse it */
+	if ((ms[0].len & (ms[0].len - 1)) != 0) {
+		PMD_DRV_LOG(ERR, "memory size must be power of 2\n");
+		return -1;
+	}
+
+	virtio_net = &devices[0];
+	ivshmem = &devices[1];
+	piix3 = &devices[2];
+
+	virtio_net->name = "virtio-net";
+	virtio_net->device_id = VIRTIO_NET_DEVICE_ID;
+	virtio_net->vendor_id = VIRTIO_NET_VENDOR_ID;
+	virtio_net->init = qtest_init_pci_device;
+	virtio_net->bar[0].addr = PCI_BASE_ADDRESS_0;
+	virtio_net->bar[0].type = QTEST_PCI_BAR_IO;
+	virtio_net->bar[0].region_start = VIRTIO_NET_IO_START;
+	virtio_net->bar[1].addr = PCI_BASE_ADDRESS_1;
+	virtio_net->bar[1].type = QTEST_PCI_BAR_MEMORY_32;
+	virtio_net->bar[1].region_start = VIRTIO_NET_MEMORY1_START;
+	virtio_net->bar[4].addr = PCI_BASE_ADDRESS_4;
+	virtio_net->bar[4].type = QTEST_PCI_BAR_MEMORY_64;
+	virtio_net->bar[4].region_start = VIRTIO_NET_MEMORY2_START;
+
+	ivshmem->name = "ivshmem";
+	ivshmem->device_id = IVSHMEM_DEVICE_ID;
+	ivshmem->vendor_id = IVSHMEM_VENDOR_ID;
+	ivshmem->init = qtest_init_pci_device;
+	ivshmem->bar[0].addr = PCI_BASE_ADDRESS_0;
+	ivshmem->bar[0].type = QTEST_PCI_BAR_MEMORY_32;
+	ivshmem->bar[0].region_start = IVSHMEM_MEMORY_START;
+	ivshmem->bar[2].addr = PCI_BASE_ADDRESS_2;
+	ivshmem->bar[2].type = QTEST_PCI_BAR_MEMORY_64;
+	/* In host mode, only one memory segment is vaild */
+	ivshmem->bar[2].region_start = (uint64_t)ms[0].addr;
+
+	/* piix3 is needed to route irqs from virtio-net to ioapic */
+	piix3->name = "piix3";
+	piix3->device_id = PIIX3_DEVICE_ID;
+	piix3->vendor_id = PIIX3_VENDOR_ID;
+	piix3->init = qtest_init_piix3_device;
+
+	/*
+	 * Set pci addresses specified by command line.
+	 * QTest utils will only check specified pci address.
+	 * If it's wrong, a target device won't be found.
+	 */
+	default_addr.domain = 0;
+	default_addr.bus = 0;
+	default_addr.function = 0;
+
+	default_addr.devid = 3;
+	ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+			ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR,
+			&virtio_net->specified_addr, &default_addr);
+	if (ret < 0)
+		return -1;
+
+	default_addr.devid = 4;
+	ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+			ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR,
+			&ivshmem->specified_addr, &default_addr);
+	if (ret < 0)
+		return -1;
+
+	default_addr.devid = 1;
+	ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+			ETH_VIRTIO_NET_ARG_PIIX3_ADDR,
+			&piix3->specified_addr, &default_addr);
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+/*
+ * Initialization when "CONFIG_RTE_VIRTIO_VDEV_QTEST" is enabled.
+ */
+static int
+rte_qtest_virtio_pmd_init(const char *name, const char *params)
+{
+	struct rte_kvargs *kvlist;
+	struct virtio_hw *hw = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+	char *qtest_path = NULL, *ivshmem_path = NULL;
+	struct qtest_pci_device devices[QTEST_DEVICE_NUM];
+	int ret;
+
+	if (params == NULL || params[0] == '\0')
+		return -EINVAL;
+
+	kvlist = rte_kvargs_parse(params, valid_qtest_args);
+	if (kvlist == NULL) {
+		PMD_INIT_LOG(ERR, "error when parsing param");
+		return -EFAULT;
+	}
+
+	ret = virtio_net_eth_pmd_parse_socket_path(kvlist,
+			ETH_VIRTIO_NET_ARG_IVSHMEM_PATH, &ivshmem_path);
+	if (ret < 0)
+		goto error;
+
+	ret = virtio_net_eth_pmd_parse_socket_path(kvlist,
+			ETH_VIRTIO_NET_ARG_QTEST_PATH, &qtest_path);
+	if (ret < 0)
+		goto error;
+
+	ret = virtio_prepare_target_devices(devices, kvlist);
+	if (ret < 0)
+		goto error;
+
+	eth_dev = virtio_net_eth_dev_alloc(name);
+	if (eth_dev == NULL)
+		goto error;
+
+	hw = eth_dev->data->dev_private;
+	hw->qsession = qtest_vdev_init(qtest_path, ivshmem_path,
+			VIRTIO_NET_IRQ_NUM, devices, QTEST_DEVICE_NUM);
+	if (hw->qsession == NULL)
+		goto error;
+
+	/* originally, this will be called in rte_eal_pci_probe() */
+	ret = eth_virtio_dev_init(eth_dev);
+	if (ret < 0)
+		goto error;
+
+	eth_dev->driver = NULL;
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
+	eth_dev->data->kdrv = RTE_KDRV_NONE;
+	eth_dev->data->drv_name = QTEST_DRV_NAME;
+
+	free(qtest_path);
+	free(ivshmem_path);
+	rte_kvargs_free(kvlist);
+	return 0;
+
+error:
+	if (hw != NULL && hw->qsession != NULL)
+		qtest_vdev_uninit(hw->qsession);
+	if (eth_dev)
+		virtio_net_eth_dev_free(eth_dev);
+	if (qtest_path)
+		free(qtest_path);
+	if (ivshmem_path)
+		free(ivshmem_path);
+	rte_kvargs_free(kvlist);
+	return -EFAULT;
+}
+
+/*
+ * Finalization when "CONFIG_RTE_VIRTIO_VDEV_QTEST" is enabled.
+ */
+static int
+rte_qtest_virtio_pmd_uninit(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct virtio_hw *hw;
+	int ret;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return -ENODEV;
+
+	ret = eth_virtio_dev_uninit(eth_dev);
+	if (ret != 0)
+		return -EFAULT;
+
+	hw = eth_dev->data->dev_private;
+	qtest_vdev_uninit(hw->qsession);
+
+	ret = virtio_net_eth_dev_free(eth_dev);
+	if (ret != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+static struct rte_driver rte_qtest_virtio_driver = {
+	.name   = QTEST_DRV_NAME,
+	.type   = PMD_VDEV,
+	.init   = rte_qtest_virtio_pmd_init,
+	.uninit = rte_qtest_virtio_pmd_uninit,
+};
+
+PMD_REGISTER_DRIVER(rte_qtest_virtio_driver);
+#endif /* RTE_VIRTIO_VDEV_QTEST */
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v4 12/12] docs: add release note for qtest virtio container support
  2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
                           ` (12 preceding siblings ...)
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
@ 2016-03-09  8:33         ` Tetsuya Mukawa
  13 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-03-09  8:33 UTC (permalink / raw)
  To: dev, jianfeng.tan, huawei.xie, yuanhan.liu

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 doc/guides/rel_notes/release_16_04.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/guides/rel_notes/release_16_04.rst b/doc/guides/rel_notes/release_16_04.rst
index e3142f2..1c8c6b2 100644
--- a/doc/guides/rel_notes/release_16_04.rst
+++ b/doc/guides/rel_notes/release_16_04.rst
@@ -61,6 +61,9 @@ This section should contain new features added in this release. Sample format:
 
   Add a new virtual device, named eth_cvio, to support virtio for containers.
 
+* **Virtio support for containers using QEMU qtest mode.**
+  Add a new virtual device, named eth_qtest_virtio, to support virtio for containers
+  using QEMU qtest mode.
 
 Resolved Issues
 ---------------
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
@ 2016-06-02  3:29           ` Tetsuya Mukawa
  2016-06-02  7:31             ` Yuanhan Liu
  2016-06-02  3:29           ` [PATCH v5 1/6] virtio, qtest: Add QTest utility basic functions Tetsuya Mukawa
                             ` (8 subsequent siblings)
  9 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:29 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

The patches will work on below patch series.
 - [PATCH v5 0/8] virtio support for container

It seems his implementation will be changed a bit.
So, this patch series are also going to be changed to follow his implementation.


[Changes]
v5 changes:
 - Rebase on latest dpdk-next-virtio.
 - Follow Jianfeng's implementation to support virtual virtio-net device.
 - Split the patch series like followings.
   - This patch series.
     Only support basic functions.
     The functions to handle LSC interrupt and '--range-virtaddr' was
     removed from this patch series.
     This patch needs EAL memory mapped between (1<<31) to (1<<44).
     To allocate such a memory, just assume the user will use '--base-virtaddr'.
     If appropriate memory cannot be allocated, this PMD will exit as error.
     Then the users can try other values.
  - Supplement patches to support link status interrupt.
  - Supplement patches to support '--range-virtaddr'.
    This EAL option will help to allocate memory mapped between (1<<31) to
    (1<<44).

v4 changes:
 - Rebase on latest master.
 - Split patches.
 - To abstract qtest code more, change interface between current virtio
   code and qtest code.
 - Rename qtest.c to qtest_utils.c
 - Change implementation like below.
   - Set pci device information out of qtest abstraction, then pass it to
     qtest to initialize devices.
 - Remove redundant condition checking from qtest_raw_send/recv().
 - Fix return value of qtest_raw_send().

v3 changes:
 - Rebase on latest master.
 - remove "-qtest-virtio" option, then add "--range-virtaddr" and
   "--align-memsize" options.
 - Fix typos in qtest.c

v2 changes:
 - Rebase on above patch seiries.
 - Rebase on master
 - Add "--qtest-virtio" EAL option.
 - Fixes in qtest.c
  - Fix error handling for the case qtest connection is closed.
  - Use eventfd for interrupt messaging.
  - Use linux header for PCI register definitions.
  - Fix qtest_raw_send/recv to handle error correctly.
  - Fix bit mask of PCI_CONFIG_ADDR.
  - Describe memory and ioport usage of qtest guest in qtest.c
  - Remove loop that is for finding PCI devices.


[Abstraction]

Normally, virtio-net PMD only works on VM, because there is no virtio-net device on host.
This patches extend  virtio-net PMD to be able to work on host as virtual PMD.
But we didn't implement virtio-net device as a part of virtio-net PMD.
To prepare virtio-net device for the PMD, start QEMU process with special QTest mode, then connect it from virtio-net PMD through unix domain socket.

The PMD can connect to anywhere QEMU virtio-net device can.
For example, the PMD can connects to vhost-net kernel module and vhost-user backend application.
Similar to virtio-net PMD on QEMU, application memory that uses virtio-net PMD will be shared between vhost backend application.
But vhost backend application memory will not be shared.

Main target of this PMD is container like docker, rkt, lxc and etc.
We can isolate related processes(virtio-net PMD process, QEMU and vhost-user backend process) by container.
But, to communicate through unix domain socket, shared directory will be needed.


[How to use]

 Please use QEMU-2.5.1, or above.
 (So far, QEMU-2.5.1 hasn't been released yet, so please checkout master from QEMU repository)

 - Compile
 Set "CONFIG_RTE_VIRTIO_VDEV_QTEST=y" in config/common_linux.
 Then compile it.

 - Start QEMU like below.
 $ qemu-system-x86_64 \
              -machine pc-i440fx-1.4,accel=qtest \
              -display none -qtest-log /dev/null \
              -qtest unix:/tmp/socket,server \
              -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
              -device virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
              -chardev socket,id=chr1,path=/tmp/ivshmem,server \
              -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4

 - Start DPDK application like below
 $ testpmd -c f -n 1 -m 1024 --no-pci --base-virtaddr=0x400000000 \
             --vdev="eth_virtio_qtest0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
             -- --disable-hw-vlan --txqflags=0xf00 -i

(*1) Please Specify same memory size in QEMU and DPDK command line.
(*2) Should use qemu-2.5.1, or above.
(*3) QEMU process is needed per port.
(*4) virtio-1.0 device are only supported.
(*5) The vhost backends like vhost-net and vhost-user can be specified.
(*6) In most cases, just using above command is enough, but you can also
     specify other QEMU virtio-net options.
(*7) Only checked "pc-i440fx-1.4" machine, but may work with other
     machines. It depends on a machine has piix3 south bridge.
     If the machine doesn't have, virtio-net PMD cannot receive status
     changed interrupts.
(*8) Should not add "--enable-kvm" to QEMU command line.


[Detailed Description]

 - virtio-net device implementation
The PMD uses QEMU virtio-net device. To do that, QEMU QTest functionality is used.
QTest is a test framework of QEMU devices. It allows us to implement a device driver outside of QEMU.
With QTest, we can implement DPDK application and virtio-net PMD as standalone process on host.
When QEMU is invoked as QTest mode, any guest code will not run.
To know more about QTest, see below.
http://wiki.qemu.org/Features/QTest

 - probing devices
QTest provides a unix domain socket. Through this socket, driver process can access to I/O port and memory of QEMU virtual machine.
The PMD will send I/O port accesses to probe pci devices.
If we can find virtio-net and ivshmem device, initialize the devices.
Also, I/O port accesses of virtio-net PMD will be sent through socket, and virtio-net PMD can initialize vitio-net device on QEMU correctly.

 - ivshmem device to share memory
To share memory that virtio-net PMD process uses, ivshmem device will be used.
Because ivshmem device can only handle one file descriptor, shared memory should be consist of one file.
To allocate such a memory, EAL has new option called "--single-file".
Also, the hugepages should be mapped between "1 << 31" to "1 << 44".
To map like above, use '--base-virtaddr' option.
While initializing ivshmem device, we can set BAR(Base Address Register).
It represents which memory QEMU vcpu can access to this shared memory.
We will specify host virtual address of shared memory as this address.
It is very useful because we don't need to apply patch to QEMU to calculate address offset.
(For example, if virtio-net PMD process will allocate memory from shared memory, then specify the virtual address of it to virtio-net register, QEMU virtio-net device can understand it without calculating address offset.)



Tetsuya Mukawa (6):
  virtio, qtest: Add QTest utility basic functions
  virtio, qtest: Add pci device initialization function to qtest utils
  virtio, qtest: Add functionality to share memory between QTest guest
  virtio, qtest: Add misc functions to handle pci information
  virtio: Add QTest support to vtpci abstraction
  virtio: Add QTest support for virtio-net PMD

 config/common_linuxapp                             |    2 +
 drivers/net/virtio/Makefile                        |    6 +
 drivers/net/virtio/virtio_ethdev.c                 |    3 +-
 drivers/net/virtio/virtio_ethdev.h                 |    1 +
 drivers/net/virtio/virtio_qtest/qtest.h            |   95 ++
 drivers/net/virtio/virtio_qtest/qtest_utils.c      | 1087 ++++++++++++++++++++
 drivers/net/virtio/virtio_qtest/qtest_utils.h      |  289 ++++++
 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c |  393 +++++++
 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.h |   42 +
 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c |  407 ++++++++
 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.h |   39 +
 drivers/net/virtio/virtqueue.h                     |    6 +-
 12 files changed, 2365 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_qtest/qtest.h
 create mode 100644 drivers/net/virtio/virtio_qtest/qtest_utils.c
 create mode 100644 drivers/net/virtio/virtio_qtest/qtest_utils.h
 create mode 100644 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c
 create mode 100644 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.h
 create mode 100644 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c
 create mode 100644 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.h

-- 
2.7.4

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH v5 1/6] virtio, qtest: Add QTest utility basic functions
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
  2016-06-02  3:29           ` [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
@ 2016-06-02  3:29           ` Tetsuya Mukawa
  2016-06-02  3:29           ` [PATCH v5 2/6] virtio, qtest: Add pci device initialization function to qtest utils Tetsuya Mukawa
                             ` (7 subsequent siblings)
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:29 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

The patch adds basic functions for accessing to QEMU quest that runs in
QTest mode. The functions will be used by virtio container extension
that can access to the above guest.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 config/common_linuxapp                        |   2 +
 drivers/net/virtio/Makefile                   |   4 +
 drivers/net/virtio/virtio_qtest/qtest_utils.c | 480 ++++++++++++++++++++++++++
 drivers/net/virtio/virtio_qtest/qtest_utils.h | 119 +++++++
 4 files changed, 605 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_qtest/qtest_utils.c
 create mode 100644 drivers/net/virtio/virtio_qtest/qtest_utils.h

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 946a6d4..3bf6237 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -46,3 +46,5 @@ CONFIG_RTE_LIBRTE_POWER=y
 
 # Enable virtio-user
 CONFIG_RTE_VIRTIO_VDEV=y
+# Enable virtio-qtest
+CONFIG_RTE_VIRTIO_QTEST=y
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 13b2b75..0b1ccff 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -63,6 +63,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_pci.c
 endif
 
+ifeq ($(CONFIG_RTE_VIRTIO_QTEST),y)
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_qtest/qtest_utils.c
+endif
+
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.c b/drivers/net/virtio/virtio_qtest/qtest_utils.c
new file mode 100644
index 0000000..3ad8f9e
--- /dev/null
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.c
@@ -0,0 +1,480 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <pthread.h>
+#include <fcntl.h>
+
+#include <rte_malloc.h>
+
+#include "../virtio_logs.h"
+#include "../virtio_ethdev.h"
+#include "qtest_utils.h"
+
+union qtest_pipefds {
+	struct {
+		int pipefd[2];
+	};
+	struct {
+		int readfd;
+		int writefd;
+	};
+};
+
+struct qtest_session {
+	int qtest_socket;
+	pthread_mutex_t qtest_session_lock;
+
+	pthread_t event_th;
+	int event_th_started;
+	char *evq;
+	char *evq_dequeue_ptr;
+	size_t evq_total_len;
+
+	union qtest_pipefds msgfds;
+};
+
+static int
+qtest_raw_send(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = write(fd, buf, len);
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			return ret;
+		}
+		if (ret == (int)len)
+			break;
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+static int
+qtest_raw_recv(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = read(fd, buf, len);
+		if (ret <= 0) {
+			if (errno == EINTR) {
+				continue;
+			}
+			return ret;
+		}
+		if (ret == (int)len)
+			break;
+		if (*(buf + ret - 1) == '\n')
+			break;
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+/*
+ * To know QTest protocol specification, see below QEMU source code.
+ *  - qemu/qtest.c
+ * If qtest socket is closed, qtest_raw_in and qtest_raw_read will return 0.
+ */
+static uint32_t
+qtest_raw_in(struct qtest_session *s, uint16_t addr, char type)
+{
+	char buf[64];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "in%c 0x%x\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_out(struct qtest_session *s, uint16_t addr, uint32_t val, char type)
+{
+	char buf[64];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "out%c 0x%x 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+static uint32_t
+qtest_raw_read(struct qtest_session *s, uint64_t addr, char type)
+{
+	char buf[64];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "read%c 0x%lx\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0)
+		return 0;
+
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_raw_write(struct qtest_session *s, uint64_t addr, uint32_t val, char type)
+{
+	char buf[64];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "write%c 0x%lx 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+/*
+ * qtest_in/out are used for accessing ioport of qemu guest.
+ * qtest_read/write are used for accessing memory of qemu guest.
+ */
+uint32_t
+qtest_in(struct qtest_session *s, uint16_t addr, char type)
+{
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_in(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_out(struct qtest_session *s, uint16_t addr, uint64_t val, char type)
+{
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+uint32_t
+qtest_read(struct qtest_session *s, uint64_t addr, char type)
+{
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_raw_read(s, addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+void
+qtest_write(struct qtest_session *s, uint64_t addr, uint64_t val, char type)
+{
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_write(s, addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+static void
+qtest_event_send(struct qtest_session *s, char *buf)
+{
+	int ret;
+
+	/* relay normal message to pipe */
+	ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
+	if (ret < 0)
+		rte_panic("cannot relay normal message\n");
+}
+
+static void
+qtest_close_one_socket(int *fd)
+{
+	if (*fd > 0) {
+		close(*fd);
+		*fd = -1;
+	}
+}
+
+static void
+qtest_close_sockets(struct qtest_session *s)
+{
+	qtest_close_one_socket(&s->qtest_socket);
+	qtest_close_one_socket(&s->msgfds.readfd);
+	qtest_close_one_socket(&s->msgfds.writefd);
+}
+
+static void
+qtest_event_enqueue(struct qtest_session *s, char *buf)
+{
+	size_t len = strlen(buf);
+	char *dest;
+
+	if (s->evq == NULL) {
+		/* allocate one more byte for '\0' */
+		s->evq = malloc(len + 1);
+		if (s->evq == NULL)
+			rte_panic("Cannot allocate memory\n");
+
+		s->evq_dequeue_ptr = s->evq;
+		s->evq_total_len = len + 1;
+		dest = s->evq;
+	} else {
+		size_t offset = s->evq_dequeue_ptr - s->evq;
+
+		s->evq = realloc(s->evq, s->evq_total_len + len);
+		if (s->evq == NULL)
+			rte_panic("Cannot re-allocate memory\n");
+
+		s->evq_dequeue_ptr = s->evq + offset;
+		dest = s->evq + s->evq_total_len - 1;
+		s->evq_total_len += len;
+	}
+
+	strncpy(dest, buf, len);
+	dest[len] = '\0';
+}
+
+static char *
+qtest_event_dequeue(struct qtest_session *s)
+{
+	char *head, *next_head;
+
+	head = s->evq_dequeue_ptr;
+
+	/* make sure message is terminated by '\n' */
+	next_head = strchr(s->evq_dequeue_ptr, '\n');
+	if (next_head == NULL)
+		return NULL;
+
+	/* set next dequeue pointer */
+	s->evq_dequeue_ptr = next_head + 1;
+
+	return head;
+}
+
+static void
+qtest_event_flush(struct qtest_session *s)
+{
+	if (s->evq) {
+		free(s->evq);
+		s->evq = NULL;
+		s->evq_dequeue_ptr = NULL;
+		s->evq_total_len = 0;
+	}
+}
+
+/*
+ * This thread relays QTest response using pipe and eventfd.
+ * The function is needed because we need to separate IRQ message from others.
+ */
+static void *
+qtest_event_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	char buf[64];
+	char *p;
+	int ret;
+
+	for (;;) {
+		memset(buf, 0, sizeof(buf));
+		ret = qtest_raw_recv(s->qtest_socket, buf, sizeof(buf));
+		if (ret <= 0) {
+			PMD_DRV_LOG(EMERG,
+				"QTest connection was closed.\n"
+				"Please detach the port, then start QEMU "
+				"and attach the port again.\n");
+			qtest_close_sockets(s);
+			qtest_event_flush(s);
+			return NULL;
+		}
+
+		qtest_event_enqueue(s, buf);
+
+		/* in the case of incomplete message, receive again */
+		p = &buf[sizeof(buf) - 1];
+		if ((*p != '\0') && (*p != '\n'))
+			continue;
+
+		/* may receive multiple messages at the same time */
+		while ((p = qtest_event_dequeue(s)) != NULL)
+			qtest_event_send(s, p);
+
+		qtest_event_flush(s);
+	}
+	return NULL;
+}
+
+static int
+qtest_open_socket(char *path)
+{
+	struct sockaddr_un sa = {0};
+	int ret, fd, loop = 100;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		return -1;
+
+	sa.sun_family = AF_UNIX;
+	strncpy(sa.sun_path, path, sizeof(sa.sun_path));
+
+	while (loop--) {
+		/*
+		 * If QEMU has multiple sockets needed to be listen, QEMU needs
+		 * some time to start listening a next socket.
+		 * In our case, after connecting ivshmem socket, we may need to wait
+		 * a bit to connect to qtest socket.
+		 */
+		ret = connect(fd, (struct sockaddr *)&sa,
+				sizeof(struct sockaddr_un));
+		if (ret == 0)
+			break;
+		else
+			usleep(100000);
+	}
+
+	if (ret != 0) {
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+void
+qtest_vdev_uninit(struct qtest_session *s)
+{
+	qtest_close_sockets(s);
+	qtest_event_flush(s);
+
+	if (s->event_th_started) {
+		pthread_cancel(s->event_th);
+		pthread_join(s->event_th, NULL);
+		s->event_th_started = 0;
+	}
+
+	pthread_mutex_destroy(&s->qtest_session_lock);
+	rte_free(s);
+}
+
+struct qtest_session *
+qtest_vdev_init(char *qtest_path)
+{
+	struct qtest_session *s;
+	int ret;
+
+	s = rte_zmalloc(NULL, sizeof(*s), RTE_CACHE_LINE_SIZE);
+	if (s == NULL) {
+		PMD_DRV_LOG(ERR, "Failed to allocate memory");
+		return NULL;
+	}
+
+	ret = pthread_mutex_init(&s->qtest_session_lock, NULL);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize mutex");
+		rte_free(s);
+		return NULL;
+	}
+
+	ret = pipe(s->msgfds.pipefd);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize message pipe");
+		goto error;
+	}
+
+	s->qtest_socket = qtest_open_socket(qtest_path);
+	if (s->qtest_socket < 0) {
+		PMD_DRV_LOG(ERR, "Failed to open %s", qtest_path);
+		goto error;
+	}
+
+	ret = pthread_create(&s->event_th, NULL, qtest_event_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create event handler");
+		goto error;
+	}
+	s->event_th_started = 1;
+
+	return s;
+
+error:
+	qtest_vdev_uninit(s);
+	return NULL;
+}
diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.h b/drivers/net/virtio/virtio_qtest/qtest_utils.h
new file mode 100644
index 0000000..e39cde8
--- /dev/null
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.h
@@ -0,0 +1,119 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _QTEST_UTILS_H_
+#define _QTEST_UTILS_H_
+
+/**
+ * @internal
+ * Initialization function of QTest utility.
+ *
+ * @param qtest_path
+ *   Path of qtest socket.
+ * @return
+ *   The pointer to qtest session structure.
+ */
+struct qtest_session *qtest_vdev_init(char *qtest_path);
+
+/**
+ * @internal
+ * Finalization function of QTest utility.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ */
+void qtest_vdev_uninit(struct qtest_session *s);
+
+/**
+ * @internal
+ * Read a port of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param addr
+ *   The port address.
+ * @param type
+ *   Size of port. Specify one of 'l', 'w', and 'b'.
+ * @return
+ *   Value read from the port.
+ */
+uint32_t qtest_in(struct qtest_session *s, uint16_t addr, char type);
+
+/**
+ * @internal
+ * Write a port of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param addr
+ *   The port address.
+ * @param val
+ *   Written value.
+ * @param type
+ *   Size of port. Specify one of 'l', 'w', and 'b'.
+ */
+void qtest_out(struct qtest_session *s, uint16_t addr,
+			uint64_t val, char type);
+
+/**
+ * @internal
+ * Read memory of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param addr
+ *   The memory address.
+ * @param type
+ *   Size of port. Specify one of 'l', 'w', and 'b'.
+ * @return
+ *   Value read from the memory.
+ */
+uint32_t qtest_read(struct qtest_session *s, uint64_t addr, char type);
+
+/**
+ * @internal
+ * Write memory of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param addr
+ *   The memory address.
+ * @param val
+ *   Written value.
+ * @param type
+ *   Size of memory. Specify one of 'l', 'w', and 'b'.
+ */
+void qtest_write(struct qtest_session *s, uint64_t addr,
+			uint64_t val, char type);
+
+#endif /* _QTEST_UTILS_H_ */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v5 2/6] virtio, qtest: Add pci device initialization function to qtest utils
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
  2016-06-02  3:29           ` [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
  2016-06-02  3:29           ` [PATCH v5 1/6] virtio, qtest: Add QTest utility basic functions Tetsuya Mukawa
@ 2016-06-02  3:29           ` Tetsuya Mukawa
  2016-06-02  3:29           ` [PATCH v5 3/6] virtio, qtest: Add functionality to share memory between QTest guest Tetsuya Mukawa
                             ` (6 subsequent siblings)
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:29 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

The patch adds general pci device initialization functionality to
qtest utils. It initializes pci devices using qtest messaging.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_qtest/qtest_utils.c | 349 +++++++++++++++++++++++++-
 drivers/net/virtio/virtio_qtest/qtest_utils.h | 114 ++++++++-
 2 files changed, 461 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.c b/drivers/net/virtio/virtio_qtest/qtest_utils.c
index 3ad8f9e..2c088f0 100644
--- a/drivers/net/virtio/virtio_qtest/qtest_utils.c
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.c
@@ -43,6 +43,10 @@
 #include "../virtio_ethdev.h"
 #include "qtest_utils.h"
 
+#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
+	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
+	((_function) & 0x7) << 8 | ((_offset) & 0xfc))
+
 union qtest_pipefds {
 	struct {
 		int pipefd[2];
@@ -57,6 +61,8 @@ struct qtest_session {
 	int qtest_socket;
 	pthread_mutex_t qtest_session_lock;
 
+	struct qtest_pci_device_list head;
+
 	pthread_t event_th;
 	int event_th_started;
 	char *evq;
@@ -195,6 +201,119 @@ qtest_raw_write(struct qtest_session *s, uint64_t addr, uint32_t val, char type)
 }
 
 /*
+ * qtest_pci_inX/outX are used for accessing PCI configuration space.
+ * The functions are implemented based on PCI configuration space
+ * specification.
+ * Accroding to the spec, access size of read()/write() should be 4 bytes.
+ */
+static int
+qtest_pci_inb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return (tmp >> ((offset & 0x3) * 8)) & 0xff;
+}
+
+static uint32_t
+qtest_pci_inl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return tmp;
+}
+
+static void
+qtest_pci_outl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint32_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, value, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint64_t
+qtest_pci_inq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+	uint64_t val;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val = (uint64_t)qtest_raw_in(s, 0xcfc, 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	val |= (uint64_t)qtest_raw_in(s, 0xcfc, 'l') << 32;
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return val;
+}
+
+static void
+qtest_pci_outq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint64_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value & 0xffffffff), 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_raw_out(s, 0xcf8, tmp, 'l');
+	qtest_raw_out(s, 0xcfc, (uint32_t)(value >> 32), 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+/*
  * qtest_in/out are used for accessing ioport of qemu guest.
  * qtest_read/write are used for accessing memory of qemu guest.
  */
@@ -254,6 +373,18 @@ qtest_write(struct qtest_session *s, uint64_t addr, uint64_t val, char type)
 		rte_panic("Cannot lock mutex\n");
 }
 
+static struct qtest_pci_device *
+qtest_find_device(struct qtest_session *s, const char *name)
+{
+	struct qtest_pci_device *dev;
+
+	TAILQ_FOREACH(dev, &s->head, next) {
+		if (strcmp(dev->name, name) == 0)
+			return dev;
+	}
+	return NULL;
+}
+
 static void
 qtest_event_send(struct qtest_session *s, char *buf)
 {
@@ -382,6 +513,208 @@ qtest_event_handler(void *data) {
 	return NULL;
 }
 
+/*
+ * Common initialization of PCI device.
+ * To know detail, see pci specification.
+ */
+int
+qtest_init_pci_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t i, bus, device;
+	uint32_t val;
+	uint64_t val64;
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0",
+		dev->name, bus, device);
+
+	/* Check header type */
+	val = qtest_pci_inb(s, bus, device, 0, PCI_HEADER_TYPE);
+	if (val != PCI_HEADER_TYPE_NORMAL) {
+		PMD_DRV_LOG(ERR, "Unexpected header type %d", val);
+		return -1;
+	}
+
+	/* Check BAR type */
+	for (i = 0; i < NB_BAR; i++) {
+		val = qtest_pci_inl(s, bus, device, 0, dev->bar[i].addr);
+
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_IO)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_1M)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_32:
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_32)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+
+			if ((val & 0x1) != PCI_BASE_ADDRESS_SPACE_MEMORY)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			if ((val & 0x6) != PCI_BASE_ADDRESS_MEM_TYPE_64)
+				dev->bar[i].type = QTEST_PCI_BAR_DISABLE;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Enable device */
+	val = qtest_pci_inl(s, bus, device, 0, PCI_COMMAND);
+	val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	qtest_pci_outl(s, bus, device, 0, PCI_COMMAND, val);
+
+	/* Calculate BAR size */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffff);
+			val = qtest_pci_inl(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size = ~(val & 0xfffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffffffffffff);
+			val64 = qtest_pci_inq(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size =
+					~(val64 & 0xfffffffffffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Set BAR region */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_outl(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_outq(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+qtest_find_pci_device(struct qtest_session *s, const char *name)
+{
+	struct qtest_pci_device *dev;
+	struct rte_pci_addr *addr;
+	uint32_t val;
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL)
+		goto not_found;
+
+	addr = &dev->specified_addr;
+	PMD_DRV_LOG(INFO, "PCI address of %s is %04x:%02x:%02x.%02x", name,
+			addr->domain, addr->bus, addr->devid, addr->function);
+
+	val = qtest_pci_inl(s, addr->bus, addr->devid, addr->function, 0);
+	if (val == ((uint32_t)dev->device_id << 16 | dev->vendor_id)) {
+		dev->bus_addr = addr->bus;
+		dev->device_addr = addr->devid;
+		return 0;
+	}
+
+not_found:
+	PMD_DRV_LOG(ERR, "%s isn' found", name);
+	return -1;
+}
+
+static int
+qtest_init_pci_devices(struct qtest_session *s,
+		struct qtest_pci_device *devices, int devnum)
+{
+	struct qtest_pci_device *dev;
+	int i, ret;
+
+
+	/* Try to find devices */
+	for (i = 0; i < devnum; i++) {
+		ret = qtest_find_pci_device(s, devices[i].name);
+		if (ret < 0)
+			return -1;
+	}
+
+	/* Initialize devices */
+	TAILQ_FOREACH(dev, &s->head, next) {
+		ret = dev->init(s, dev);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void
+qtest_remove_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *dev, *next;
+
+	for (dev = TAILQ_FIRST(&s->head); dev != NULL; dev = next) {
+		next = TAILQ_NEXT(dev, next);
+		TAILQ_REMOVE(&s->head, dev, next);
+		free(dev);
+	}
+}
+
+static int
+qtest_register_target_devices(struct qtest_session *s,
+		struct qtest_pci_device *devices, int devnum)
+{
+	struct qtest_pci_device *device;
+	int i;
+
+	TAILQ_INIT(&s->head);
+
+	for (i = 0; i < devnum; i++) {
+		device = malloc(sizeof(*device));
+		if (device == NULL) {
+			qtest_remove_target_devices(s);
+			return -1;
+		}
+
+		*device = devices[i];
+		TAILQ_INSERT_TAIL(&s->head, device, next);
+	}
+
+	return 0;
+}
+
 static int
 qtest_open_socket(char *path)
 {
@@ -431,11 +764,13 @@ qtest_vdev_uninit(struct qtest_session *s)
 	}
 
 	pthread_mutex_destroy(&s->qtest_session_lock);
+	qtest_remove_target_devices(s);
 	rte_free(s);
 }
 
 struct qtest_session *
-qtest_vdev_init(char *qtest_path)
+qtest_vdev_init(char *qtest_path,
+		struct qtest_pci_device *devices, int devnum)
 {
 	struct qtest_session *s;
 	int ret;
@@ -459,6 +794,12 @@ qtest_vdev_init(char *qtest_path)
 		goto error;
 	}
 
+	ret = qtest_register_target_devices(s, devices, devnum);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize qtest session\n");
+		goto error;
+	}
+
 	s->qtest_socket = qtest_open_socket(qtest_path);
 	if (s->qtest_socket < 0) {
 		PMD_DRV_LOG(ERR, "Failed to open %s", qtest_path);
@@ -472,6 +813,12 @@ qtest_vdev_init(char *qtest_path)
 	}
 	s->event_th_started = 1;
 
+	ret = qtest_init_pci_devices(s, devices, devnum);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize devices\n");
+		goto error;
+	}
+
 	return s;
 
 error:
diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.h b/drivers/net/virtio/virtio_qtest/qtest_utils.h
index e39cde8..a3d8176 100644
--- a/drivers/net/virtio/virtio_qtest/qtest_utils.h
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.h
@@ -34,16 +34,114 @@
 #ifndef _QTEST_UTILS_H_
 #define _QTEST_UTILS_H_
 
+#include <sys/queue.h>
+#include <linux/pci_regs.h>
+
+#define NB_BAR                          6
+
+/*
+ * QTest utilities
+ *
+ * This utility assumes QTest guest will have below 3 pci devices.
+ * - piix3
+ *    It will be used for enabling interrupts from target device.
+ * - ivshmme
+ *    It will be used for enabling shared memory between guest and DPDK PMD.
+ * - target device
+ *    It will be the device DPDK PMD wants to use.
+ *    So far, virtio-net device is the only use case.
+ *
+ * To use the utilities, DPDK PMD needs to define above device information.
+ * Then call qtest_vdev_init().
+ * To handle multiple target devices in one QEMU guest, piix3 handling should
+ * be changed.
+ */
+
+enum qtest_pci_bar_type {
+	QTEST_PCI_BAR_DISABLE = 0,
+	QTEST_PCI_BAR_IO,
+	QTEST_PCI_BAR_MEMORY_UNDER_1MB,
+	QTEST_PCI_BAR_MEMORY_32,
+	QTEST_PCI_BAR_MEMORY_64
+};
+
+/*
+ * A structure used to specify BAR information.
+ *
+ * - type
+ *    Specify type of this device.
+ * - addr
+ *    Specify one of PCI_BASE_ADDRESS_0/../5.
+ * - region_start
+ *    Specify physical address of this device. Because a guest cpu will access
+ *    this device using the address, this address should not be over lapped by
+ *    others.
+ * - region_size
+ *    Will be filled by QTest utility while initializing the device.
+ */
+struct qtest_pci_bar {
+	enum qtest_pci_bar_type type;
+	uint8_t addr;
+	uint64_t region_start;
+	uint64_t region_size;
+};
+
+struct qtest_session;
+
+/*
+ * A structure used to specify pci device information.
+ *
+ * - name
+ *    Specify name of this device.
+ * - device_id
+ *    Specify device id of this device.
+ * - vendor_id
+ *    Specify vendor id of this device.
+ * - bus_addr
+ *    Will be filled by QTest utility.
+ *    It will be bus address of this device.
+ * - device_addr
+ *    Will be filled by QTest utility.
+ *    It will be device address of this device.
+ * - bar
+ *    Specify bar structure for this device.
+ * - specified_addr
+ *    Specify pci address of this device.
+ *    QTest utility will not check any other pci address for this device.
+ *    If it's wrong, device initialization will be failed.
+ * - init
+ *   Specify initialization function.
+ *   If the device is generic device, just specify qtest_init_pci_device().
+ */
+TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
+struct qtest_pci_device {
+	TAILQ_ENTRY(qtest_pci_device) next;
+	const char *name;
+	uint16_t device_id;
+	uint16_t vendor_id;
+	uint8_t bus_addr;
+	uint8_t device_addr;
+	struct qtest_pci_bar bar[NB_BAR];
+	struct rte_pci_addr specified_addr;
+	int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
+};
+
 /**
  * @internal
  * Initialization function of QTest utility.
  *
  * @param qtest_path
  *   Path of qtest socket.
+ * @param devices
+ *   Array of device information. It should contain piix3, ivshmem and target
+ *   device(virtio-net device).
+ * @param devnum
+ *   The number of device information.
  * @return
  *   The pointer to qtest session structure.
  */
-struct qtest_session *qtest_vdev_init(char *qtest_path);
+struct qtest_session *qtest_vdev_init(char *qtest_path,
+		struct qtest_pci_device *devices, int devnum);
 
 /**
  * @internal
@@ -116,4 +214,18 @@ uint32_t qtest_read(struct qtest_session *s, uint64_t addr, char type);
 void qtest_write(struct qtest_session *s, uint64_t addr,
 			uint64_t val, char type);
 
+/**
+ * @internal
+ * Initialization function of general device.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param dev
+ *   The pointer of pci device.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_init_pci_device(struct qtest_session *s,
+			struct qtest_pci_device *dev);
+
 #endif /* _QTEST_UTILS_H_ */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v5 3/6] virtio, qtest: Add functionality to share memory between QTest guest
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
                             ` (2 preceding siblings ...)
  2016-06-02  3:29           ` [PATCH v5 2/6] virtio, qtest: Add pci device initialization function to qtest utils Tetsuya Mukawa
@ 2016-06-02  3:29           ` Tetsuya Mukawa
  2016-06-02  3:29           ` [PATCH v5 4/6] virtio, qtest: Add misc functions to handle pci information Tetsuya Mukawa
                             ` (5 subsequent siblings)
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:29 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

The patch adds functionality to share memory between QTest guest and
DPDK application using ivshmem device.
The shared memory will be all EAL memory on hugepages. This memory will
be accessed by QEMU vcpu and DPDK application using same address.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_qtest/qtest_utils.c | 189 +++++++++++++++++++++++++-
 drivers/net/virtio/virtio_qtest/qtest_utils.h |   4 +-
 2 files changed, 189 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.c b/drivers/net/virtio/virtio_qtest/qtest_utils.c
index 2c088f0..9bc1fca 100644
--- a/drivers/net/virtio/virtio_qtest/qtest_utils.c
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.c
@@ -43,6 +43,9 @@
 #include "../virtio_ethdev.h"
 #include "qtest_utils.h"
 
+/* ivshmem configuration */
+#define IVSHMEM_PROTOCOL_VERSION        0
+
 #define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
 	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
 	((_function) & 0x7) << 8 | ((_offset) & 0xfc))
@@ -59,6 +62,7 @@ union qtest_pipefds {
 
 struct qtest_session {
 	int qtest_socket;
+	int ivshmem_socket;
 	pthread_mutex_t qtest_session_lock;
 
 	struct qtest_pci_device_list head;
@@ -411,6 +415,7 @@ qtest_close_sockets(struct qtest_session *s)
 	qtest_close_one_socket(&s->qtest_socket);
 	qtest_close_one_socket(&s->msgfds.readfd);
 	qtest_close_one_socket(&s->msgfds.writefd);
+	qtest_close_one_socket(&s->ivshmem_socket);
 }
 
 static void
@@ -716,6 +721,172 @@ qtest_register_target_devices(struct qtest_session *s,
 }
 
 static int
+qtest_send_message_to_ivshmem(int sock_fd, uint64_t client_id, int shm_fd)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = &client_id;
+	iov.iov_len = sizeof(client_id);
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (shm_fd >= 0) {
+		msgh.msg_control = &control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), &shm_fd, fdsize);
+	}
+
+	do {
+		ret = sendmsg(sock_fd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "sendmsg error");
+		return ret;
+	}
+
+	return ret;
+}
+
+/* This function is came from ../virtio_user/vhost_user.c
+ *
+ * Two possible options:
+ * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
+ * array. This is simple but cannot be used in secondary process because
+ * secondary process will close and munmap that file.
+ * 2. Match HUGEFILE_FMT to find hugepage files directly.
+ *
+ * We choose option 2.
+ */
+struct hugepage_file_info {
+	uint64_t addr;            /**< virtual addr */
+	size_t   size;            /**< the file size */
+	char     path[PATH_MAX];  /**< path to backing file */
+};
+
+static int
+get_hugepage_file_info(struct hugepage_file_info huges[], int max)
+{
+	int idx;
+	FILE *f;
+	char buf[BUFSIZ], *tmp, *tail;
+	char *str_underline, *str_start;
+	int huge_index;
+	uint64_t v_start, v_end;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		PMD_DRV_LOG(ERR, "cannot open /proc/self/maps");
+		return -1;
+	}
+
+	idx = 0;
+	while (fgets(buf, sizeof(buf), f) != NULL) {
+		sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end);
+
+		tmp = strchr(buf, ' ') + 1; /** skip address */
+		tmp = strchr(tmp, ' ') + 1; /** skip perm */
+		tmp = strchr(tmp, ' ') + 1; /** skip offset */
+		tmp = strchr(tmp, ' ') + 1; /** skip dev */
+		tmp = strchr(tmp, ' ') + 1; /** skip inode */
+		while (*tmp == ' ')         /** skip spaces */
+			tmp++;
+		tail = strrchr(tmp, '\n');  /** remove newline if exists */
+		if (tail)
+			*tail = '\0';
+
+		/* Match HUGEFILE_FMT, aka "%s/%smap_%d",
+		 * which is defined in eal_filesystem.h
+		 */
+		str_underline = strrchr(tmp, '_');
+		if (!str_underline)
+			continue;
+
+		str_start = str_underline - strlen("map");
+		if (str_start < tmp)
+			continue;
+
+		if (sscanf(str_start, "map_%d", &huge_index) != 1)
+			continue;
+
+		if (idx >= max) {
+			PMD_DRV_LOG(ERR, "Exceed maximum of %d", max);
+			goto error;
+		}
+		huges[idx].addr = v_start;
+		huges[idx].size = v_end - v_start;
+		strcpy(huges[idx].path, tmp);
+		idx++;
+	}
+
+	fclose(f);
+	return idx;
+
+error:
+	fclose(f);
+	return -1;
+}
+
+static int
+qtest_setup_shared_memory(struct qtest_session *s)
+{
+	int shm_fd, num, ret;
+	struct hugepage_file_info huges[1];
+
+	num = get_hugepage_file_info(huges, 1);
+	if (num != 1) {
+		PMD_DRV_LOG(ERR,
+			"Not supported memory configuration");
+		return -1;
+	}
+
+	shm_fd = open(huges[0].path, O_RDWR);
+	if (shm_fd < 0) {
+		PMD_DRV_LOG(ERR,
+			"Cannot open file: %s", huges[0].path);
+		return -1;
+	}
+
+	/* send our protocol version first */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket,
+			IVSHMEM_PROTOCOL_VERSION, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR,
+			"Failed to send protocol version to ivshmem");
+		return -1;
+	}
+
+	/* send client id */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, 0, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to send VMID to ivshmem");
+		return -1;
+	}
+
+	/* send message to ivshmem */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, -1, shm_fd);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to file descriptor to ivshmem");
+		return -1;
+	}
+
+	close(shm_fd);
+
+	return 0;
+}
+
+static int
 qtest_open_socket(char *path)
 {
 	struct sockaddr_un sa = {0};
@@ -769,7 +940,7 @@ qtest_vdev_uninit(struct qtest_session *s)
 }
 
 struct qtest_session *
-qtest_vdev_init(char *qtest_path,
+qtest_vdev_init(char *qtest_path, char *ivshmem_path,
 		struct qtest_pci_device *devices, int devnum)
 {
 	struct qtest_session *s;
@@ -796,7 +967,13 @@ qtest_vdev_init(char *qtest_path,
 
 	ret = qtest_register_target_devices(s, devices, devnum);
 	if (ret != 0) {
-		PMD_DRV_LOG(ERR, "Failed to initialize qtest session\n");
+		PMD_DRV_LOG(ERR, "Failed to initialize qtest session");
+		goto error;
+	}
+
+	s->ivshmem_socket = qtest_open_socket(ivshmem_path);
+	if (s->ivshmem_socket < 0) {
+		PMD_DRV_LOG(ERR, "Failed to open %s", ivshmem_path);
 		goto error;
 	}
 
@@ -813,9 +990,15 @@ qtest_vdev_init(char *qtest_path,
 	}
 	s->event_th_started = 1;
 
+	ret = qtest_setup_shared_memory(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to setup shared memory");
+		goto error;
+	}
+
 	ret = qtest_init_pci_devices(s, devices, devnum);
 	if (ret != 0) {
-		PMD_DRV_LOG(ERR, "Failed to initialize devices\n");
+		PMD_DRV_LOG(ERR, "Failed to initialize devices");
 		goto error;
 	}
 
diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.h b/drivers/net/virtio/virtio_qtest/qtest_utils.h
index a3d8176..6c70552 100644
--- a/drivers/net/virtio/virtio_qtest/qtest_utils.h
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.h
@@ -132,6 +132,8 @@ struct qtest_pci_device {
  *
  * @param qtest_path
  *   Path of qtest socket.
+ * @param ivshmem_path
+ *   Path of ivshmem socket.
  * @param devices
  *   Array of device information. It should contain piix3, ivshmem and target
  *   device(virtio-net device).
@@ -140,7 +142,7 @@ struct qtest_pci_device {
  * @return
  *   The pointer to qtest session structure.
  */
-struct qtest_session *qtest_vdev_init(char *qtest_path,
+struct qtest_session *qtest_vdev_init(char *qtest_path, char *ivshmem_path,
 		struct qtest_pci_device *devices, int devnum);
 
 /**
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v5 4/6] virtio, qtest: Add misc functions to handle pci information
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
                             ` (3 preceding siblings ...)
  2016-06-02  3:29           ` [PATCH v5 3/6] virtio, qtest: Add functionality to share memory between QTest guest Tetsuya Mukawa
@ 2016-06-02  3:29           ` Tetsuya Mukawa
  2016-06-02  3:29           ` [PATCH v5 5/6] virtio: Add QTest support to vtpci abstraction Tetsuya Mukawa
                             ` (4 subsequent siblings)
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:29 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

The patch adds below functions.
 - qtest_read_pci_cfg
 - qtest_get_bar_addr
 - qtest_get_bar_size
These are used for handling pci device information.
It will be called by later patches.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_qtest/qtest_utils.c | 77 +++++++++++++++++++++++++++
 drivers/net/virtio/virtio_qtest/qtest_utils.h | 56 +++++++++++++++++++
 2 files changed, 133 insertions(+)

diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.c b/drivers/net/virtio/virtio_qtest/qtest_utils.c
index 9bc1fca..27118fb 100644
--- a/drivers/net/virtio/virtio_qtest/qtest_utils.c
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.c
@@ -389,6 +389,83 @@ qtest_find_device(struct qtest_session *s, const char *name)
 	return NULL;
 }
 
+/*
+ * The function is used for reading pci configuration space of specifed device.
+ */
+int
+qtest_read_pci_cfg(struct qtest_session *s, const char *name,
+		void *buf, size_t len, off_t offset)
+{
+	struct qtest_pci_device *dev;
+	uint32_t i;
+	uint8_t *p = buf;
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s", name);
+		return -1;
+	}
+
+	for (i = 0; i < len; i++) {
+		*(p + i) = qtest_pci_inb(s,
+				dev->bus_addr, dev->device_addr, 0, offset + i);
+	}
+
+	return 0;
+}
+
+static struct qtest_pci_bar *
+qtest_get_bar(struct qtest_session *s, const char *name, uint8_t bar)
+{
+	struct qtest_pci_device *dev;
+
+	if (bar >= NB_BAR) {
+		PMD_DRV_LOG(ERR, "Invalid bar is specified: %u", bar);
+		return NULL;
+	}
+
+	dev = qtest_find_device(s, name);
+	if (dev == NULL) {
+		PMD_DRV_LOG(ERR, "Cannot find specified device: %s", name);
+		return NULL;
+	}
+
+	if (dev->bar[bar].type == QTEST_PCI_BAR_DISABLE) {
+		PMD_DRV_LOG(ERR, "Cannot find valid BAR(%s): %u", name, bar);
+		return NULL;
+	}
+
+	return &dev->bar[bar];
+}
+
+int
+qtest_get_bar_addr(struct qtest_session *s, const char *name,
+		uint8_t bar, uint64_t **addr)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(s, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*addr = (uint64_t *)bar_ptr->region_start;
+	return 0;
+}
+
+int
+qtest_get_bar_size(struct qtest_session *s, const char *name,
+		uint8_t bar, uint64_t *size)
+{
+	struct qtest_pci_bar *bar_ptr;
+
+	bar_ptr = qtest_get_bar(s, name, bar);
+	if (bar_ptr == NULL)
+		return -1;
+
+	*size = bar_ptr->region_size;
+	return 0;
+}
+
 static void
 qtest_event_send(struct qtest_session *s, char *buf)
 {
diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.h b/drivers/net/virtio/virtio_qtest/qtest_utils.h
index 6c70552..e41374f 100644
--- a/drivers/net/virtio/virtio_qtest/qtest_utils.h
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.h
@@ -218,6 +218,62 @@ void qtest_write(struct qtest_session *s, uint64_t addr,
 
 /**
  * @internal
+ * Read pci configuration space of QEMU guest.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param name
+ *   The name of pci device.
+ * @param buf
+ *   The pointer to the buffer.
+ * @param len
+ *   Length to read.
+ * @param offset
+ *   Offset of pci configuration space.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_read_pci_cfg(struct qtest_session *s, const char *name,
+			void *buf, size_t len, off_t offset);
+
+/**
+ * @internal
+ * Get BAR address of a specified pci device.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param name
+ *   The name of pci device.
+ * @param bar
+ *   The index of BAR. Should be between 0 to 5.
+ * @param addr
+ *   The pointer to store BAR address.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_get_bar_addr(struct qtest_session *s, const char *name,
+			uint8_t bar, uint64_t **addr);
+
+/**
+ * @internal
+ * Get BAR size of a specified pci device.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param name
+ *   The name of pci device.
+ * @param bar
+ *   The index of BAR. Should be between 0 to 5.
+ * @param size
+ *   The pointer to store BAR size.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_get_bar_size(struct qtest_session *s, const char *name,
+			uint8_t bar, uint64_t *size);
+
+/**
+ * @internal
  * Initialization function of general device.
  *
  * @param s
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v5 5/6] virtio: Add QTest support to vtpci abstraction
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
                             ` (4 preceding siblings ...)
  2016-06-02  3:29           ` [PATCH v5 4/6] virtio, qtest: Add misc functions to handle pci information Tetsuya Mukawa
@ 2016-06-02  3:29           ` Tetsuya Mukawa
  2016-06-02  3:29           ` [PATCH v5 6/6] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
                             ` (3 subsequent siblings)
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:29 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

The patch adds QTest support to vtpci abstraction.
With this patch, only modern virtio device will be supported.
This implementation will be used by later QTest extension patch
of virtio-net PMD.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/Makefile                        |   1 +
 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c | 407 +++++++++++++++++++++
 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.h |  39 ++
 3 files changed, 447 insertions(+)
 create mode 100644 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c
 create mode 100644 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.h

diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 0b1ccff..1c86d9d 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -65,6 +65,7 @@ endif
 
 ifeq ($(CONFIG_RTE_VIRTIO_QTEST),y)
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_qtest/qtest_utils.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_qtest/virtio_qtest_pci.c
 endif
 
 # this lib depends upon:
diff --git a/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c b/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c
new file mode 100644
index 0000000..d715b13
--- /dev/null
+++ b/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c
@@ -0,0 +1,407 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../virtio_logs.h"
+#include "../virtio_pci.h"
+#include "../virtqueue.h"
+
+#include "qtest_utils.h"
+#include "virtio_qtest_pci.h"
+
+static inline int
+check_vq_phys_addr_ok(struct virtqueue *vq)
+{
+	/* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
+	 * and only accepts 32 bit page frame number.
+	 * Check if the allocated physical memory exceeds 16TB.
+	 */
+	if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
+			(VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
+		PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!");
+		return 0;
+	}
+
+	return 1;
+}
+
+static inline uint8_t
+qtest_read8(struct virtio_hw *hw, uint8_t *addr)
+{
+	return qtest_read(hw->virtio_user_dev, (uint64_t)addr, 'b');
+}
+
+static inline void
+qtest_write8(struct virtio_hw *hw, uint8_t val, uint8_t *addr)
+{
+	return qtest_write(hw->virtio_user_dev, (uint64_t)addr, val, 'b');
+}
+
+static inline uint16_t
+qtest_read16(struct virtio_hw *hw, uint16_t *addr)
+{
+	return qtest_read(hw->virtio_user_dev, (uint64_t)addr, 'w');
+}
+
+static inline void
+qtest_write16(struct virtio_hw *hw, uint16_t val, uint16_t *addr)
+{
+	return qtest_write(hw->virtio_user_dev, (uint64_t)addr, val, 'w');
+}
+
+static inline uint32_t
+qtest_read32(struct virtio_hw *hw, uint32_t *addr)
+{
+	return qtest_read(hw->virtio_user_dev, (uint64_t)addr, 'l');
+}
+
+static inline void
+qtest_write32(struct virtio_hw *hw, uint32_t val, uint32_t *addr)
+{
+	return qtest_write(hw->virtio_user_dev, (uint64_t)addr, val, 'l');
+}
+
+static inline void
+qtest_write64_twopart(struct virtio_hw *hw,
+		uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+	qtest_write32(hw, val & ((1ULL << 32) - 1), lo);
+	qtest_write32(hw, val >> 32,		    hi);
+}
+
+static void
+qtest_read_dev_config(struct virtio_hw *hw, size_t offset,
+		       void *dst, int length)
+{
+	int i;
+	uint8_t *p;
+	uint8_t old_gen, new_gen;
+
+	do {
+		old_gen = qtest_read8(hw, &hw->common_cfg->config_generation);
+
+		p = dst;
+		for (i = 0;  i < length; i++)
+			*p++ = qtest_read8(hw, (uint8_t *)hw->dev_cfg + offset + i);
+
+		new_gen = qtest_read8(hw, &hw->common_cfg->config_generation);
+	} while (old_gen != new_gen);
+}
+
+static void
+qtest_write_dev_config(struct virtio_hw *hw, size_t offset,
+			const void *src, int length)
+{
+	int i;
+	const uint8_t *p = src;
+
+	for (i = 0;  i < length; i++)
+		qtest_write8(hw, *p++, (uint8_t *)hw->dev_cfg + offset + i);
+}
+
+static uint64_t
+qtest_get_features(struct virtio_hw *hw)
+{
+	uint32_t features_lo, features_hi;
+
+	qtest_write32(hw, 0, &hw->common_cfg->device_feature_select);
+	features_lo = qtest_read32(hw, &hw->common_cfg->device_feature);
+
+	qtest_write32(hw, 1, &hw->common_cfg->device_feature_select);
+	features_hi = qtest_read32(hw, &hw->common_cfg->device_feature);
+
+	return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static void
+qtest_set_features(struct virtio_hw *hw, uint64_t features)
+{
+	qtest_write32(hw, 0, &hw->common_cfg->guest_feature_select);
+	qtest_write32(hw, features & ((1ULL << 32) - 1),
+		&hw->common_cfg->guest_feature);
+
+	qtest_write32(hw, 1, &hw->common_cfg->guest_feature_select);
+	qtest_write32(hw, features >> 32,
+		&hw->common_cfg->guest_feature);
+}
+
+static uint8_t
+qtest_get_status(struct virtio_hw *hw)
+{
+	return qtest_read8(hw, &hw->common_cfg->device_status);
+}
+
+static void
+qtest_set_status(struct virtio_hw *hw, uint8_t status)
+{
+	qtest_write8(hw, status, &hw->common_cfg->device_status);
+}
+
+static void
+qtest_reset(struct virtio_hw *hw)
+{
+	qtest_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+	qtest_get_status(hw);
+}
+
+static uint8_t
+qtest_get_isr(struct virtio_hw *hw)
+{
+	return qtest_read8(hw, hw->isr);
+}
+
+static uint16_t
+qtest_set_config_irq(struct virtio_hw *hw, uint16_t vec)
+{
+	qtest_write16(hw, vec, &hw->common_cfg->msix_config);
+	return qtest_read16(hw, &hw->common_cfg->msix_config);
+}
+
+static uint16_t
+qtest_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
+{
+	qtest_write16(hw, queue_id, &hw->common_cfg->queue_select);
+	return qtest_read16(hw, &hw->common_cfg->queue_size);
+}
+
+static int
+qtest_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	uint64_t desc_addr, avail_addr, used_addr;
+	uint16_t notify_off;
+
+	if (!check_vq_phys_addr_ok(vq))
+		return -1;
+
+	desc_addr = (uint64_t)vq->mz->addr;
+	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
+							 ring[vq->vq_nentries]),
+				   VIRTIO_PCI_VRING_ALIGN);
+
+	qtest_write16(hw, vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	qtest_write64_twopart(hw, desc_addr, &hw->common_cfg->queue_desc_lo,
+				      &hw->common_cfg->queue_desc_hi);
+	qtest_write64_twopart(hw, avail_addr, &hw->common_cfg->queue_avail_lo,
+				       &hw->common_cfg->queue_avail_hi);
+	qtest_write64_twopart(hw, used_addr, &hw->common_cfg->queue_used_lo,
+				      &hw->common_cfg->queue_used_hi);
+
+	notify_off = qtest_read16(hw, &hw->common_cfg->queue_notify_off);
+	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
+				notify_off * hw->notify_off_multiplier);
+
+	qtest_write16(hw, 1, &hw->common_cfg->queue_enable);
+
+	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
+	PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
+	PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr);
+	PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr);
+	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
+		vq->notify_addr, notify_off);
+
+	return 0;
+}
+
+static void
+qtest_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	qtest_write16(hw, vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_desc_lo,
+				  &hw->common_cfg->queue_desc_hi);
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_avail_lo,
+				  &hw->common_cfg->queue_avail_hi);
+	qtest_write64_twopart(hw, 0, &hw->common_cfg->queue_used_lo,
+				  &hw->common_cfg->queue_used_hi);
+
+	qtest_write16(hw, 0, &hw->common_cfg->queue_enable);
+}
+
+static void
+qtest_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
+{
+	qtest_write16(hw, 1, vq->notify_addr);
+}
+
+const struct virtio_pci_ops modern_qtest_ops = {
+	.read_dev_cfg	= qtest_read_dev_config,
+	.write_dev_cfg	= qtest_write_dev_config,
+	.reset		= qtest_reset,
+	.get_status	= qtest_get_status,
+	.set_status	= qtest_set_status,
+	.get_features	= qtest_get_features,
+	.set_features	= qtest_set_features,
+	.get_isr	= qtest_get_isr,
+	.set_config_irq	= qtest_set_config_irq,
+	.get_queue_num	= qtest_get_queue_num,
+	.setup_queue	= qtest_setup_queue,
+	.del_queue	= qtest_del_queue,
+	.notify_queue	= qtest_notify_queue,
+};
+
+static void *
+get_cfg_addr(struct virtio_hw *hw, struct virtio_pci_cap *cap)
+{
+	uint8_t  bar    = cap->bar;
+	uint32_t length = cap->length;
+	uint32_t offset = cap->offset;
+	uint8_t *base = NULL;
+	uint64_t size = 0;
+
+	if (bar > 5) {
+		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
+		return NULL;
+	}
+
+	if (offset + length < offset) {
+		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
+			offset, length);
+		return NULL;
+	}
+
+	qtest_get_bar_size(hw->virtio_user_dev, "virtio-net", bar, &size);
+	qtest_get_bar_addr(hw->virtio_user_dev, "virtio-net", bar,
+							(uint64_t **)&base);
+
+	if (offset + length > size) {
+		PMD_INIT_LOG(ERR,
+			"invalid cap: overflows bar space: %u > %" PRIu64,
+			offset + length, size);
+		return NULL;
+	}
+
+	if (base == NULL) {
+		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
+		return NULL;
+	}
+
+	return base + offset;
+}
+
+static int
+virtio_read_caps(struct virtio_hw *hw)
+{
+	uint8_t pos;
+	struct virtio_pci_cap cap;
+	int ret;
+
+	ret = qtest_read_pci_cfg(hw->virtio_user_dev, "virtio-net",
+				 &pos, 1, PCI_CAPABILITY_LIST);
+	if (ret < 0) {
+		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
+		return -1;
+	}
+
+	while (pos) {
+		ret = qtest_read_pci_cfg(hw->virtio_user_dev, "virtio-net",
+					 &cap, sizeof(cap), pos);
+		if (ret < 0) {
+			PMD_INIT_LOG(ERR,
+				"failed to read pci cap at pos: %x", pos);
+			break;
+		}
+
+		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
+			PMD_INIT_LOG(DEBUG,
+				"[%2x] skipping non VNDR cap id: %02x",
+				pos, cap.cap_vndr);
+			goto next;
+		}
+
+		PMD_INIT_LOG(DEBUG,
+			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
+			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+
+		switch (cap.cfg_type) {
+		case VIRTIO_PCI_CAP_COMMON_CFG:
+			hw->common_cfg = get_cfg_addr(hw, &cap);
+			break;
+		case VIRTIO_PCI_CAP_NOTIFY_CFG:
+			qtest_read_pci_cfg(hw->virtio_user_dev, "virtio-net",
+						&hw->notify_off_multiplier,
+						4, pos + sizeof(cap));
+			hw->notify_base = get_cfg_addr(hw, &cap);
+			break;
+		case VIRTIO_PCI_CAP_DEVICE_CFG:
+			hw->dev_cfg = get_cfg_addr(hw, &cap);
+			break;
+		case VIRTIO_PCI_CAP_ISR_CFG:
+			hw->isr = get_cfg_addr(hw, &cap);
+			break;
+		}
+
+next:
+		pos = cap.cap_next;
+	}
+
+	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+	    hw->dev_cfg == NULL    || hw->isr == NULL) {
+		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
+		return -1;
+	}
+
+	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
+
+	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
+	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
+	PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
+	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
+		hw->notify_base, hw->notify_off_multiplier);
+
+	return 0;
+}
+
+int
+qtest_vtpci_init(struct virtio_hw *hw, uint32_t *dev_flags)
+{
+	/*
+	 * Try if we can succeed reading virtio pci caps, which exists
+	 * only on modern pci device.
+	 */
+	if (virtio_read_caps(hw) == 0) {
+		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
+		hw->vtpci_ops = &modern_qtest_ops;
+		hw->modern    = 1;
+		/* So far, we don't support LSC interrupt */
+		*dev_flags = 0;
+		return 0;
+	}
+
+	PMD_INIT_LOG(INFO, "So far, legacy virtio device isn't supported");
+	return -EFAULT;
+}
diff --git a/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.h b/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.h
new file mode 100644
index 0000000..6024e27
--- /dev/null
+++ b/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.h
@@ -0,0 +1,39 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_QTEST_PCI_H
+#define _VIRTIO_QTEST_PCI_H
+
+int qtest_vtpci_init(struct virtio_hw *hw, uint32_t *dev_flags);
+
+#endif
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v5 6/6] virtio: Add QTest support for virtio-net PMD
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
                             ` (5 preceding siblings ...)
  2016-06-02  3:29           ` [PATCH v5 5/6] virtio: Add QTest support to vtpci abstraction Tetsuya Mukawa
@ 2016-06-02  3:29           ` Tetsuya Mukawa
  2016-06-02  3:30           ` [PATCH v1 0/2] Supplement patches for virtio-qtest to support LSC interrupt Tetsuya Mukawa
                             ` (2 subsequent siblings)
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:29 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

The patch adds a new virtio-net PMD configuration that allows the PMD to
work on host as if the PMD is in VM.
Here is new configuration for virtio-net PMD.
 - CONFIG_RTE_VIRTIO_QTEST
To use this mode, EAL needs map all hugepages as one file. Also the file
should be mapped between (1 << 31) and (1 << 44). And start address
should be aligned by EAL memory size.

To allocate like above, use below '--base-virtaddr' option with
appropriate value.
If EAL cannot allocate memory like above, the PMD will return error
while initialization. In the case, try other values.
Later supplement patches will help allocating EAL memory like above.

To prepare virtio-net device on host, the users need to invoke QEMU
process in special QTest mode. This mode is mainly used for testing QEMU
devices from outer process. In this mode, no guest runs.
Here is QEMU command line.

 $ qemu-system-x86_64 \
     -machine pc-i440fx-1.4,accel=qtest \
     -display none -qtest-log /dev/null \
     -qtest unix:/tmp/socket,server \
     -netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
     -device
virtio-net-pci,netdev=net0,mq=on,disable-modern=false,addr=3 \
     -chardev socket,id=chr1,path=/tmp/ivshmem,server \
     -device ivshmem,size=1G,chardev=chr1,vectors=1,addr=4

 * Should use QEMU-2.6, or above.
 * QEMU process is needed per port.
 * virtio-1.0 device are only supported.
 * The vhost backends like vhost-net and vhost-user can be specified.
 * In most cases, just using above command is enough, but you can also
   specify other QEMU virtio-net options like mac address.
 * Only checked "pc-i440fx-1.4" machine, but may work with other
   machines.
 * Should not add "--enable-kvm" to QEMU command line.

After invoking QEMU, the PMD can connect to QEMU process using unix
domain sockets. Over these sockets, virtio-net and ivshmem in QEMU
are probed by the PMD.
Here is example of command line.

 $ testpmd -c f -n 1 -m 1024 --no-pci --base-virtaddr=0x400000000 \
      --vdev="eth_virtio_qtest0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem"\
      -- --disable-hw-vlan --txqflags=0xf00 -i

Please specify same unix domain sockets and memory size in both QEMU
and DPDK command lines like above.
The share memory size should be power of 2, because ivshmem only
accepts such memory size.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/Makefile                        |   1 +
 drivers/net/virtio/virtio_ethdev.c                 |   3 +-
 drivers/net/virtio/virtio_ethdev.h                 |   1 +
 drivers/net/virtio/virtio_qtest/qtest.h            |  95 +++++
 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c | 393 +++++++++++++++++++++
 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.h |  42 +++
 drivers/net/virtio/virtqueue.h                     |   6 +-
 7 files changed, 536 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/virtio/virtio_qtest/qtest.h
 create mode 100644 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c
 create mode 100644 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.h

diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 1c86d9d..5933205 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -66,6 +66,7 @@ endif
 ifeq ($(CONFIG_RTE_VIRTIO_QTEST),y)
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_qtest/qtest_utils.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_qtest/virtio_qtest_pci.c
+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_qtest/virtio_qtest_dev.c
 endif
 
 # this lib depends upon:
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index f8972f2..c35d1c0 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -59,7 +59,6 @@
 #include "virtqueue.h"
 #include "virtio_rxtx.h"
 
-static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
 static int  virtio_dev_configure(struct rte_eth_dev *dev);
 static int  virtio_dev_start(struct rte_eth_dev *dev);
 static void virtio_dev_stop(struct rte_eth_dev *dev);
@@ -1179,7 +1178,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	return 0;
 }
 
-static int
+int
 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 {
 	struct rte_pci_device *pci_dev;
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index 284afaa..cbb03f5 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -114,6 +114,7 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
 
 int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
+int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
 
 /*
  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
diff --git a/drivers/net/virtio/virtio_qtest/qtest.h b/drivers/net/virtio/virtio_qtest/qtest.h
new file mode 100644
index 0000000..534c5a0
--- /dev/null
+++ b/drivers/net/virtio/virtio_qtest/qtest.h
@@ -0,0 +1,95 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_QTEST_H_
+#define _VIRTIO_QTEST_H_
+
+#define QTEST_DRV_NAME		        "eth_virtio_qtest"
+#define QTEST_DEVICE_NUM                2
+
+#include <rte_pci.h>
+#include <linux/pci_regs.h>
+
+/* Device information */
+#define VIRTIO_NET_DEVICE_ID            0x1000
+#define VIRTIO_NET_VENDOR_ID            0x1af4
+#define IVSHMEM_DEVICE_ID               0x1110
+#define IVSHMEM_VENDOR_ID               0x1af4
+#define PIIX3_DEVICE_ID                 0x7000
+#define PIIX3_VENDOR_ID                 0x8086
+
+/* ------------------------------------------------------------
+ * IO port mapping of qtest guest
+ * ------------------------------------------------------------
+ * 0x0000 - 0xbfff : not used
+ * 0xc000 - 0xc03f : virtio-net(BAR0)
+ * 0xc040 - 0xffff : not used
+ *
+ * ------------------------------------------------------------
+ * Memory mapping of qtest quest
+ * ------------------------------------------------------------
+ * 0x00000000_00000000 - 0x00000000_3fffffff : not used
+ * 0x00000000_40000000 - 0x00000000_40000fff : virtio-net(BAR1)
+ * 0x00000000_40001000 - 0x00000000_40ffffff : not used
+ * 0x00000000_41000000 - 0x00000000_417fffff : virtio-net(BAR4)
+ * 0x00000000_41800000 - 0x00000000_41ffffff : not used
+ * 0x00000000_42000000 - 0x00000000_420000ff : ivshmem(BAR0)
+ * 0x00000000_42000100 - 0x00000000_42ffffff : not used
+ * 0x00000000_80000000 - 0xffffffff_ffffffff : ivshmem(BAR2)
+ *
+ * We can only specify start address of a region. The region size
+ * will be defined by the device implementation in QEMU.
+ * The size will be pow of 2 according to the PCI specification.
+ * Also, the region start address should be aligned by region size.
+ *
+ * BAR2 of ivshmem will be used to mmap DPDK application memory.
+ * So this address will be dynamically changed, but not to overlap
+ * others, it should be mmaped between above addresses. Such allocation
+ * should be done by EAL.
+ */
+#define VIRTIO_NET_IO_START             0xc000
+#define VIRTIO_NET_MEMORY1_START	0x40000000
+#define VIRTIO_NET_MEMORY2_START	0x41000000
+#define IVSHMEM_MEMORY1_START           0x42000000
+#define IVSHMEM_MEMORY2_START           0x80000000
+
+static inline struct rte_pci_id
+qtest_get_pci_id_of_virtio_net(void)
+{
+	struct rte_pci_id id =  {VIRTIO_NET_DEVICE_ID,
+		VIRTIO_NET_VENDOR_ID, PCI_ANY_ID, PCI_ANY_ID};
+
+	return id;
+}
+
+#endif /* _VIRTIO_QTEST_H_ */
diff --git a/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c b/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c
new file mode 100644
index 0000000..dec38ff
--- /dev/null
+++ b/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c
@@ -0,0 +1,393 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co.,Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co.,Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+
+#include "../virtio_logs.h"
+#include "../virtio_ethdev.h"
+
+#include "qtest.h"
+#include "qtest_utils.h"
+#include "virtio_qtest_dev.h"
+#include "virtio_qtest_pci.h"
+
+#define ETH_VIRTIO_NET_ARG_QTEST_PATH           "qtest"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_PATH         "ivshmem"
+#define ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR      "virtio-net-addr"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR         "ivshmem-addr"
+
+static const char *valid_qtest_args[] = {
+	ETH_VIRTIO_NET_ARG_QTEST_PATH,
+	ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
+	ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR,
+	ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR,
+	NULL
+};
+
+static int
+get_socket_path_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	char **p;
+
+	if ((value == NULL) || (extra_args == NULL))
+		return -EINVAL;
+
+	p = extra_args;
+	*p = strdup(value);
+
+	if (*p == NULL)
+		return -1;
+
+	return 0;
+}
+
+static int
+get_pci_addr_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	struct rte_pci_addr *addr = extra_args;
+
+	if ((value == NULL) || (extra_args == NULL))
+		return -EINVAL;
+
+	if (eal_parse_pci_DomBDF(value, addr) != 0)
+		return -1;
+
+	if (addr->domain != 0)
+		return -1;
+
+	return 0;
+}
+
+static int
+virtio_net_eth_dev_free(struct rte_eth_dev *eth_dev)
+{
+	struct virtio_hw *hw;
+	int ret;
+
+	ret = rte_eth_dev_release_port(eth_dev);
+	if (ret < 0) {
+		PMD_INIT_LOG(ERR, "cannot release a port\n");
+		return -1;
+	}
+
+	hw = eth_dev->data->dev_private;
+	rte_free(hw);
+
+	return 0;
+}
+
+static struct rte_eth_dev *
+virtio_net_eth_dev_alloc(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct rte_eth_dev_data *data;
+	struct virtio_hw *hw;
+	int ret;
+
+	eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+	if (eth_dev == NULL) {
+		PMD_INIT_LOG(ERR, "cannot alloc a port\n");
+		return NULL;
+	}
+
+	data = eth_dev->data;
+
+	hw = rte_zmalloc(NULL, sizeof(*hw), 0);
+	if (hw == NULL) {
+		PMD_INIT_LOG(ERR, "malloc virtio_hw failed\n");
+		ret = rte_eth_dev_release_port(eth_dev);
+		if (ret < 0)
+			rte_panic("cannot release a port");
+		return NULL;
+	}
+
+	data->dev_private = hw;
+	data->numa_node = SOCKET_ID_ANY;
+	data->kdrv = RTE_KDRV_NONE;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	eth_dev->pci_dev = NULL;
+	eth_dev->driver = NULL;
+
+	return eth_dev;
+}
+
+static int
+virtio_net_eth_pmd_parse_socket_path(struct rte_kvargs *kvlist,
+		const char *option, char **path)
+{
+	int ret;
+
+	if (rte_kvargs_count(kvlist, option) == 1) {
+		ret = rte_kvargs_process(kvlist, option,
+				&get_socket_path_arg, path);
+		if (ret != 0) {
+			PMD_INIT_LOG(ERR,
+					"Failed to connect to %s socket", option);
+			return -1;
+		}
+	} else {
+		PMD_INIT_LOG(ERR, "No argument specified for %s", option);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+virtio_net_eth_pmd_parse_pci_addr(struct rte_kvargs *kvlist,
+		const char *option, struct rte_pci_addr *addr,
+		struct rte_pci_addr *default_addr)
+{
+	int ret;
+
+	if (rte_kvargs_count(kvlist, option) == 1) {
+		ret = rte_kvargs_process(kvlist, option,
+				&get_pci_addr_arg, addr);
+		if (ret != 0) {
+			PMD_INIT_LOG(ERR,
+					"Specified invalid address in '%s'", option);
+			return -1;
+		}
+	} else
+		/* copy default pci address */
+		*addr = *default_addr;
+
+	return 0;
+}
+
+static int
+virtio_prepare_target_devices(struct qtest_pci_device *devices,
+			struct rte_kvargs *kvlist)
+{
+	struct qtest_pci_device *virtio_net, *ivshmem;
+	struct rte_pci_addr default_addr;
+	const struct rte_memseg *ms;
+	int ret;
+
+	ms = rte_eal_get_physmem_layout();
+	/* if EAL memory size isn't pow of 2, ivshmem will refuse it */
+	if ((ms[0].len & (ms[0].len - 1)) != 0) {
+		PMD_DRV_LOG(ERR, "memory size must be power of 2\n");
+		return -1;
+	}
+
+	/* EAL memory should be mapped under 16TB */
+	if ((uint64_t)ms[0].addr >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
+		PMD_DRV_LOG(ERR, "EAL memory shouldn't be mapped above 16TB");
+		return -1;
+	}
+
+	/* EAL memory should be mapped above IVSHMEM_MEMORY2_START */
+	if ((uint64_t)ms[0].addr < IVSHMEM_MEMORY2_START) {
+		PMD_DRV_LOG(ERR, "EAL memory shouldn't be mapped under 0x%x",
+				IVSHMEM_MEMORY2_START);
+		return -1;
+	}
+
+	virtio_net = &devices[0];
+	ivshmem = &devices[1];
+
+	virtio_net->name = "virtio-net";
+	virtio_net->device_id = VIRTIO_NET_DEVICE_ID;
+	virtio_net->vendor_id = VIRTIO_NET_VENDOR_ID;
+	virtio_net->init = qtest_init_pci_device;
+	virtio_net->bar[0].addr = PCI_BASE_ADDRESS_0;
+	virtio_net->bar[0].type = QTEST_PCI_BAR_IO;
+	virtio_net->bar[0].region_start = VIRTIO_NET_IO_START;
+	virtio_net->bar[1].addr = PCI_BASE_ADDRESS_1;
+	virtio_net->bar[1].type = QTEST_PCI_BAR_MEMORY_32;
+	virtio_net->bar[1].region_start = VIRTIO_NET_MEMORY1_START;
+	virtio_net->bar[4].addr = PCI_BASE_ADDRESS_4;
+	virtio_net->bar[4].type = QTEST_PCI_BAR_MEMORY_64;
+	virtio_net->bar[4].region_start = VIRTIO_NET_MEMORY2_START;
+
+	ivshmem->name = "ivshmem";
+	ivshmem->device_id = IVSHMEM_DEVICE_ID;
+	ivshmem->vendor_id = IVSHMEM_VENDOR_ID;
+	ivshmem->init = qtest_init_pci_device;
+	ivshmem->bar[0].addr = PCI_BASE_ADDRESS_0;
+	ivshmem->bar[0].type = QTEST_PCI_BAR_MEMORY_32;
+	ivshmem->bar[0].region_start = IVSHMEM_MEMORY1_START;
+	ivshmem->bar[2].addr = PCI_BASE_ADDRESS_2;
+	ivshmem->bar[2].type = QTEST_PCI_BAR_MEMORY_64;
+	/* In host mode, only one memory segment is vaild */
+	ivshmem->bar[2].region_start = (uint64_t)ms[0].addr;
+
+	/*
+	 * Set pci addresses specified by command line.
+	 * QTest utils will only check specified pci address.
+	 * If it's wrong, a target device won't be found.
+	 */
+	default_addr.domain = 0;
+	default_addr.bus = 0;
+	default_addr.function = 0;
+
+	default_addr.devid = 3;
+	ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+			ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR,
+			&virtio_net->specified_addr, &default_addr);
+	if (ret < 0)
+		return -1;
+
+	default_addr.devid = 4;
+	ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+			ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR,
+			&ivshmem->specified_addr, &default_addr);
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+/*
+ * Initialization when "CONFIG_RTE_VIRTIO_VDEV_QTEST" is enabled.
+ */
+static int
+rte_qtest_virtio_pmd_init(const char *name, const char *params)
+{
+	struct rte_kvargs *kvlist;
+	struct virtio_hw *hw = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+	char *qtest_path = NULL, *ivshmem_path = NULL;
+	struct qtest_pci_device devices[QTEST_DEVICE_NUM];
+	int ret;
+
+	if (params == NULL || params[0] == '\0')
+		return -EINVAL;
+
+	kvlist = rte_kvargs_parse(params, valid_qtest_args);
+	if (kvlist == NULL) {
+		PMD_INIT_LOG(ERR, "error when parsing param");
+		return -EFAULT;
+	}
+
+	ret = virtio_net_eth_pmd_parse_socket_path(kvlist,
+			ETH_VIRTIO_NET_ARG_IVSHMEM_PATH, &ivshmem_path);
+	if (ret < 0)
+		goto error;
+
+	ret = virtio_net_eth_pmd_parse_socket_path(kvlist,
+			ETH_VIRTIO_NET_ARG_QTEST_PATH, &qtest_path);
+	if (ret < 0)
+		goto error;
+
+	ret = virtio_prepare_target_devices(devices, kvlist);
+	if (ret < 0)
+		goto error;
+
+	eth_dev = virtio_net_eth_dev_alloc(name);
+	if (eth_dev == NULL)
+		goto error;
+
+	hw = eth_dev->data->dev_private;
+	hw->virtio_user_dev = qtest_vdev_init(qtest_path, ivshmem_path,
+			devices, QTEST_DEVICE_NUM);
+	if (hw->virtio_user_dev == NULL)
+		goto error;
+
+	ret = qtest_vtpci_init(hw, &eth_dev->data->dev_flags);
+	if (ret  < 0)
+		goto error;
+
+	/* originally, this will be called in rte_eal_pci_probe() */
+	ret = eth_virtio_dev_init(eth_dev);
+	if (ret < 0)
+		goto error;
+
+	eth_dev->driver = NULL;
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
+	eth_dev->data->kdrv = RTE_KDRV_NONE;
+	eth_dev->data->drv_name = QTEST_DRV_NAME;
+
+	free(qtest_path);
+	free(ivshmem_path);
+	rte_kvargs_free(kvlist);
+	return 0;
+
+error:
+	if (hw != NULL && hw->virtio_user_dev != NULL)
+		qtest_vdev_uninit(hw->virtio_user_dev);
+	if (eth_dev)
+		virtio_net_eth_dev_free(eth_dev);
+	if (qtest_path)
+		free(qtest_path);
+	if (ivshmem_path)
+		free(ivshmem_path);
+	rte_kvargs_free(kvlist);
+	return -EFAULT;
+}
+
+static int
+rte_qtest_virtio_pmd_uninit(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct virtio_hw *hw;
+	int ret;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return -ENODEV;
+
+	ret = eth_virtio_dev_uninit(eth_dev);
+	if (ret != 0)
+		return -EFAULT;
+
+	hw = eth_dev->data->dev_private;
+	qtest_vdev_uninit(hw->virtio_user_dev);
+
+	ret = virtio_net_eth_dev_free(eth_dev);
+	if (ret != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+static struct rte_driver rte_qtest_virtio_driver = {
+	.name   = QTEST_DRV_NAME,
+	.type   = PMD_VDEV,
+	.init   = rte_qtest_virtio_pmd_init,
+	.uninit = rte_qtest_virtio_pmd_uninit,
+};
+
+PMD_REGISTER_DRIVER(rte_qtest_virtio_driver);
diff --git a/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.h b/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.h
new file mode 100644
index 0000000..82f1eec
--- /dev/null
+++ b/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.h
@@ -0,0 +1,42 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_QTEST_VDEV_H
+#define _VIRTIO_QTEST_VDEV_H
+
+#include <limits.h>
+#include "../virtio_pci.h"
+
+const struct virtio_pci_ops qtest_ops;
+
+#endif
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 8ffc366..8edb85f 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -66,13 +66,13 @@ struct rte_mbuf;
 
 #define VIRTQUEUE_MAX_NAME_SZ 32
 
-#ifdef RTE_VIRTIO_VDEV
+#if defined(RTE_VIRTIO_VDEV) || defined(RTE_VIRTIO_QTEST)
 #define MBUF_DATA_DMA_ADDR(mb, offset) \
 	((uint64_t)((uintptr_t)(*(void **)((uintptr_t)mb + offset)) \
 			+ (mb)->data_off))
-#else /* RTE_VIRTIO_VDEV */
+#else /* RTE_VIRTIO_VDEV or RTE_VIRTIO_QTEST */
 #define MBUF_DATA_DMA_ADDR(mb, offset) rte_mbuf_data_dma_addr(mb)
-#endif /* RTE_VIRTIO_VDEV */
+#endif /* RTE_VIRTIO_VDEV or RTE_VIRTIO_QTEST */
 
 #define VTNET_SQ_RQ_QUEUE_IDX 0
 #define VTNET_SQ_TQ_QUEUE_IDX 1
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v1 0/2] Supplement patches for virtio-qtest to support LSC interrupt
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
                             ` (6 preceding siblings ...)
  2016-06-02  3:29           ` [PATCH v5 6/6] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
@ 2016-06-02  3:30           ` Tetsuya Mukawa
  2016-06-02  3:30           ` [PATCH v1 1/2] virtio: Handle interrupt things under vtpci abstraction Tetsuya Mukawa
  2016-06-02  3:30           ` [PATCH v1 2/2] virtio, qtest: Add functionality to handle interrupt Tetsuya Mukawa
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:30 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

This is patches to support LSC interrupt handling for virtio-qtest.
This patches should be on below patches.
 - [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container

To support LSC interrupts, vtpci abstraction was expanded to handle interrupt from
pci devices.
Actually, this PMD is handling a virtual virtio-net device. So handling interrupts
are a bit different from actual pci devices. In this case, all interrupts are come
from unix domain socket connected to QEMU.


Tetsuya Mukawa (2):
  virtio: Handle interrupt things under vtpci abstraction
  virtio, qtest: Add functionality to handle interrupt

 drivers/net/virtio/virtio_ethdev.c                 |  17 +-
 drivers/net/virtio/virtio_pci.c                    |  86 +++++---
 drivers/net/virtio/virtio_pci.h                    |   7 +
 drivers/net/virtio/virtio_qtest/qtest.h            |   3 +-
 drivers/net/virtio/virtio_qtest/qtest_utils.c      | 225 ++++++++++++++++++++-
 drivers/net/virtio/virtio_qtest/qtest_utils.h      |  68 ++++++-
 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c |  23 ++-
 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c |  64 ++++--
 8 files changed, 432 insertions(+), 61 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 120+ messages in thread

* [PATCH v1 1/2] virtio: Handle interrupt things under vtpci abstraction
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
                             ` (7 preceding siblings ...)
  2016-06-02  3:30           ` [PATCH v1 0/2] Supplement patches for virtio-qtest to support LSC interrupt Tetsuya Mukawa
@ 2016-06-02  3:30           ` Tetsuya Mukawa
  2016-06-02  3:30           ` [PATCH v1 2/2] virtio, qtest: Add functionality to handle interrupt Tetsuya Mukawa
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:30 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

So far, interrupts from PCI devices are handled in virtio_ethdev
directly. The patch changes it, and try to handle it under vtpci
abstraction. The patch is needed because virtio-qtest needs to handle
interrupts from virtual pci devices.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c | 10 ++---
 drivers/net/virtio/virtio_pci.c    | 86 ++++++++++++++++++++++++++------------
 drivers/net/virtio/virtio_pci.h    |  7 ++++
 3 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index c35d1c0..8b5fb66 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1011,7 +1011,7 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 	isr = vtpci_isr(hw);
 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
-	if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
+	if (hw->vtpci_ops->intr_enable(hw) < 0)
 		PMD_DRV_LOG(ERR, "interrupt enable failed");
 
 	if (isr & VIRTIO_PCI_ISR_CONFIG) {
@@ -1170,7 +1170,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	/* Setup interrupt callback  */
 	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
-		rte_intr_callback_register(&pci_dev->intr_handle,
+		hw->vtpci_ops->intr_cb_register(hw,
 				   virtio_interrupt_handler, eth_dev);
 
 	virtio_dev_cq_start(eth_dev);
@@ -1205,7 +1205,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 
 	/* reset interrupt callback  */
 	if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
-		rte_intr_callback_unregister(&pci_dev->intr_handle,
+		hw->vtpci_ops->intr_cb_unregister(hw,
 						virtio_interrupt_handler,
 						eth_dev);
 	rte_eal_pci_unmap_device(pci_dev);
@@ -1294,7 +1294,7 @@ virtio_dev_start(struct rte_eth_dev *dev)
 			return -ENOTSUP;
 		}
 
-		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
+		if (hw->vtpci_ops->intr_enable(hw) < 0) {
 			PMD_DRV_LOG(ERR, "interrupt enable failed");
 			return -EIO;
 		}
@@ -1398,7 +1398,7 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 	hw->started = 0;
 
 	if (dev->data->dev_conf.intr_conf.lsc)
-		rte_intr_disable(&dev->pci_dev->intr_handle);
+		hw->vtpci_ops->intr_disable(hw);
 
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 6bd239c..acbc9b1 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -71,6 +71,32 @@ check_vq_phys_addr_ok(struct virtqueue *vq)
 	return 1;
 }
 
+static int
+intr_cb_register(struct virtio_hw *hw,
+			rte_intr_callback_fn cb, void *cb_arg)
+{
+	return rte_intr_callback_register(&hw->dev->intr_handle, cb, cb_arg);
+}
+
+static int
+intr_cb_unregister(struct virtio_hw *hw,
+			rte_intr_callback_fn cb, void *cb_arg)
+{
+	return rte_intr_callback_register(&hw->dev->intr_handle, cb, cb_arg);
+}
+
+static int
+intr_enable(struct virtio_hw *hw)
+{
+	return rte_intr_enable(&hw->dev->intr_handle);
+}
+
+static int
+intr_disable(struct virtio_hw *hw)
+{
+	return rte_intr_disable(&hw->dev->intr_handle);
+}
+
 static void
 legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
 		       void *dst, int length)
@@ -234,19 +260,23 @@ legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
 }
 
 static const struct virtio_pci_ops legacy_ops = {
-	.read_dev_cfg	= legacy_read_dev_config,
-	.write_dev_cfg	= legacy_write_dev_config,
-	.reset		= legacy_reset,
-	.get_status	= legacy_get_status,
-	.set_status	= legacy_set_status,
-	.get_features	= legacy_get_features,
-	.set_features	= legacy_set_features,
-	.get_isr	= legacy_get_isr,
-	.set_config_irq	= legacy_set_config_irq,
-	.get_queue_num	= legacy_get_queue_num,
-	.setup_queue	= legacy_setup_queue,
-	.del_queue	= legacy_del_queue,
-	.notify_queue	= legacy_notify_queue,
+	.read_dev_cfg		= legacy_read_dev_config,
+	.write_dev_cfg		= legacy_write_dev_config,
+	.reset			= legacy_reset,
+	.get_status		= legacy_get_status,
+	.set_status		= legacy_set_status,
+	.get_features		= legacy_get_features,
+	.set_features		= legacy_set_features,
+	.get_isr		= legacy_get_isr,
+	.set_config_irq		= legacy_set_config_irq,
+	.get_queue_num		= legacy_get_queue_num,
+	.setup_queue		= legacy_setup_queue,
+	.del_queue		= legacy_del_queue,
+	.notify_queue		= legacy_notify_queue,
+	.intr_cb_register	= intr_cb_register,
+	.intr_cb_unregister	= intr_cb_unregister,
+	.intr_enable		= intr_enable,
+	.intr_disable		= intr_disable,
 };
 
 
@@ -450,19 +480,23 @@ modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 }
 
 static const struct virtio_pci_ops modern_ops = {
-	.read_dev_cfg	= modern_read_dev_config,
-	.write_dev_cfg	= modern_write_dev_config,
-	.reset		= modern_reset,
-	.get_status	= modern_get_status,
-	.set_status	= modern_set_status,
-	.get_features	= modern_get_features,
-	.set_features	= modern_set_features,
-	.get_isr	= modern_get_isr,
-	.set_config_irq	= modern_set_config_irq,
-	.get_queue_num	= modern_get_queue_num,
-	.setup_queue	= modern_setup_queue,
-	.del_queue	= modern_del_queue,
-	.notify_queue	= modern_notify_queue,
+	.read_dev_cfg		= modern_read_dev_config,
+	.write_dev_cfg		= modern_write_dev_config,
+	.reset			= modern_reset,
+	.get_status		= modern_get_status,
+	.set_status		= modern_set_status,
+	.get_features		= modern_get_features,
+	.set_features		= modern_set_features,
+	.get_isr		= modern_get_isr,
+	.set_config_irq		= modern_set_config_irq,
+	.get_queue_num		= modern_get_queue_num,
+	.setup_queue		= modern_setup_queue,
+	.del_queue		= modern_del_queue,
+	.notify_queue		= modern_notify_queue,
+	.intr_cb_register	= intr_cb_register,
+	.intr_cb_unregister	= intr_cb_unregister,
+	.intr_enable		= intr_enable,
+	.intr_disable		= intr_disable,
 };
 
 
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index d10d013..a74aa02 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -237,6 +237,13 @@ struct virtio_pci_ops {
 	int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq);
 	void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq);
 	void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq);
+
+	int (*intr_cb_register)(struct virtio_hw *hw,
+			rte_intr_callback_fn cb, void *cb_arg);
+	int (*intr_cb_unregister)(struct virtio_hw *hw,
+			rte_intr_callback_fn cb, void *cb_arg);
+	int (*intr_enable)(struct virtio_hw *hw);
+	int (*intr_disable)(struct virtio_hw *hw);
 };
 
 struct virtio_net_config;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* [PATCH v1 2/2] virtio, qtest: Add functionality to handle interrupt
  2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
                             ` (8 preceding siblings ...)
  2016-06-02  3:30           ` [PATCH v1 1/2] virtio: Handle interrupt things under vtpci abstraction Tetsuya Mukawa
@ 2016-06-02  3:30           ` Tetsuya Mukawa
  9 siblings, 0 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  3:30 UTC (permalink / raw)
  To: dev; +Cc: yuanhan.liu, jianfeng.tan, huawei.xie, Tetsuya Mukawa

The patch adds functionality to handle interrupt from pci device of
QEMU guest. To handle the interrupts, the patch adds to initialize piix3
pci device.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 drivers/net/virtio/virtio_ethdev.c                 |   7 +-
 drivers/net/virtio/virtio_qtest/qtest.h            |   3 +-
 drivers/net/virtio/virtio_qtest/qtest_utils.c      | 225 ++++++++++++++++++++-
 drivers/net/virtio/virtio_qtest/qtest_utils.h      |  68 ++++++-
 drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c |  23 ++-
 drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c |  64 ++++--
 6 files changed, 360 insertions(+), 30 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 8b5fb66..e8737ab 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1043,7 +1043,6 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	struct virtio_net_config *config;
 	struct virtio_net_config local_config;
 	struct rte_pci_device *pci_dev;
-	uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE;
 	int ret;
 
 	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf));
@@ -1067,8 +1066,9 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	pci_dev = eth_dev->pci_dev;
 
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 	if (pci_dev) {
-		ret = vtpci_init(pci_dev, hw, &dev_flags);
+		ret = vtpci_init(pci_dev, hw, &eth_dev->data->dev_flags);
 		if (ret)
 			return ret;
 	}
@@ -1086,10 +1086,9 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	/* If host does not support status then disable LSC */
 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
-		dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
+		eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
-	eth_dev->data->dev_flags = dev_flags;
 
 	rx_func_get(eth_dev);
 
diff --git a/drivers/net/virtio/virtio_qtest/qtest.h b/drivers/net/virtio/virtio_qtest/qtest.h
index 534c5a0..7e8c093 100644
--- a/drivers/net/virtio/virtio_qtest/qtest.h
+++ b/drivers/net/virtio/virtio_qtest/qtest.h
@@ -35,7 +35,7 @@
 #define _VIRTIO_QTEST_H_
 
 #define QTEST_DRV_NAME		        "eth_virtio_qtest"
-#define QTEST_DEVICE_NUM                2
+#define QTEST_DEVICE_NUM                3
 
 #include <rte_pci.h>
 #include <linux/pci_regs.h>
@@ -43,6 +43,7 @@
 /* Device information */
 #define VIRTIO_NET_DEVICE_ID            0x1000
 #define VIRTIO_NET_VENDOR_ID            0x1af4
+#define VIRTIO_NET_IRQ_NUM              10
 #define IVSHMEM_DEVICE_ID               0x1110
 #define IVSHMEM_VENDOR_ID               0x1af4
 #define PIIX3_DEVICE_ID                 0x7000
diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.c b/drivers/net/virtio/virtio_qtest/qtest_utils.c
index 27118fb..c5a3a7a 100644
--- a/drivers/net/virtio/virtio_qtest/qtest_utils.c
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.c
@@ -36,6 +36,7 @@
 #include <sys/un.h>
 #include <pthread.h>
 #include <fcntl.h>
+#include <sys/eventfd.h>
 
 #include <rte_malloc.h>
 
@@ -43,6 +44,12 @@
 #include "../virtio_ethdev.h"
 #include "qtest_utils.h"
 
+/* PIIX3 configuration registers */
+#define PIIX3_REG_ADDR_PIRQA            0x60
+#define PIIX3_REG_ADDR_PIRQB            0x61
+#define PIIX3_REG_ADDR_PIRQC            0x62
+#define PIIX3_REG_ADDR_PIRQD            0x63
+
 /* ivshmem configuration */
 #define IVSHMEM_PROTOCOL_VERSION        0
 
@@ -74,6 +81,14 @@ struct qtest_session {
 	size_t evq_total_len;
 
 	union qtest_pipefds msgfds;
+
+	int irqno;
+	pthread_t intr_th;
+	int intr_th_started;
+	int eventfd;
+	rte_atomic16_t enable_intr;
+	rte_intr_callback_fn cb;
+	void *cb_arg;
 };
 
 static int
@@ -230,6 +245,29 @@ qtest_pci_inb(struct qtest_session *s, uint8_t bus, uint8_t device,
 	return (tmp >> ((offset & 0x3) * 8)) & 0xff;
 }
 
+static void
+qtest_pci_outb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint8_t value)
+{
+	uint32_t addr, tmp, pos;
+
+	addr = PCI_CONFIG_ADDR(bus, device, function, offset);
+	pos = (offset % 4) * 8;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	tmp = qtest_raw_in(s, 0xcfc, 'l');
+	tmp = (tmp & ~(0xff << pos)) | (value << pos);
+
+	qtest_raw_out(s, 0xcf8, addr, 'l');
+	qtest_raw_out(s, 0xcfc, tmp, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
 static uint32_t
 qtest_pci_inl(struct qtest_session *s, uint8_t bus, uint8_t device,
 		uint8_t function, uint8_t offset)
@@ -466,15 +504,112 @@ qtest_get_bar_size(struct qtest_session *s, const char *name,
 	return 0;
 }
 
+int
+qtest_intr_enable(struct qtest_session *s)
+{
+	rte_atomic16_set(&s->enable_intr, 1);
+
+	return 0;
+}
+
+int
+qtest_intr_disable(struct qtest_session *s)
+{
+	rte_atomic16_set(&s->enable_intr, 0);
+
+	return 0;
+}
+
+void
+qtest_intr_callback_register(struct qtest_session *s,
+		rte_intr_callback_fn cb, void *cb_arg)
+{
+	s->cb = cb;
+	s->cb_arg = cb_arg;
+	rte_atomic16_set(&s->enable_intr, 1);
+}
+
+void
+qtest_intr_callback_unregister(struct qtest_session *s,
+		rte_intr_callback_fn cb __rte_unused,
+		void *cb_arg __rte_unused)
+{
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->cb = NULL;
+	s->cb_arg = NULL;
+}
+
+static void *
+qtest_intr_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	eventfd_t value;
+	int ret;
+
+	for (;;) {
+		ret = eventfd_read(s->eventfd, &value);
+		if (ret < 0)
+			return NULL;
+		s->cb(NULL, s->cb_arg);
+	}
+	return NULL;
+}
+
+static int
+qtest_intr_initialize(struct qtest_session *s)
+{
+	char buf[64];
+	int ret;
+
+	snprintf(buf, sizeof(buf), "irq_intercept_in ioapic\n");
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	/* To enable interrupt, send "irq_intercept_in" message to QEMU */
+	ret = qtest_raw_send(s->qtest_socket, buf, strlen(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	/* just ignore QEMU response */
+	ret = qtest_raw_recv(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return 0;
+}
+
 static void
 qtest_event_send(struct qtest_session *s, char *buf)
 {
+	char interrupt_message[32];
 	int ret;
 
-	/* relay normal message to pipe */
-	ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
-	if (ret < 0)
-		rte_panic("cannot relay normal message\n");
+	/* This message will come when interrupt occurs */
+	snprintf(interrupt_message, sizeof(interrupt_message),
+			"IRQ raise %d", s->irqno);
+
+	if (strncmp(buf, interrupt_message,
+				strlen(interrupt_message)) == 0) {
+		if (rte_atomic16_read(&s->enable_intr) == 0)
+			return;
+
+		/* relay interrupt to eventfd */
+		ret = eventfd_write(s->eventfd, 1);
+		if (ret < 0)
+			rte_panic("cannot relay interrupt\n");
+	} else {
+		/* relay normal message to pipe */
+		ret = qtest_raw_send(s->msgfds.writefd, buf, strlen(buf));
+		if (ret < 0)
+			rte_panic("cannot relay normal message\n");
+	}
 }
 
 static void
@@ -492,6 +627,7 @@ qtest_close_sockets(struct qtest_session *s)
 	qtest_close_one_socket(&s->qtest_socket);
 	qtest_close_one_socket(&s->msgfds.readfd);
 	qtest_close_one_socket(&s->msgfds.writefd);
+	qtest_close_one_socket(&s->eventfd);
 	qtest_close_one_socket(&s->ivshmem_socket);
 }
 
@@ -595,6 +731,57 @@ qtest_event_handler(void *data) {
 	return NULL;
 }
 
+/* This function should be fixed when multiple target devices are supported */
+int
+qtest_init_piix3_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t bus, device, slot = 0;
+	struct qtest_pci_device *tmpdev;
+	uint8_t pcislot2regaddr[] = {	0xff,
+					0xff,
+					0xff,
+					PIIX3_REG_ADDR_PIRQC,
+					PIIX3_REG_ADDR_PIRQD,
+					PIIX3_REG_ADDR_PIRQA,
+					PIIX3_REG_ADDR_PIRQB};
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0",
+		dev->name, bus, device);
+
+	/* Get slot id that is connected to target device(virtio-net device) */
+	TAILQ_FOREACH(tmpdev, &s->head, next) {
+		if (strcmp(tmpdev->name, "piix3") != 0 &&
+				strcmp(tmpdev->name, "ivshmem") != 0) {
+			slot = tmpdev->device_addr;
+			break;
+		}
+	}
+
+	if (slot == 0)
+		return -1;
+
+	/*
+	 * Set interrupt routing for target device.
+	 * Here is i440fx/piix3 connection settings
+	 * ---------------------------------------
+	 * PCI Slot3 -> PIRQC
+	 * PCI Slot4 -> PIRQD
+	 * PCI Slot5 -> PIRQA
+	 * PCI Slot6 -> PIRQB
+	 */
+	if (pcislot2regaddr[slot] != 0xff) {
+		qtest_pci_outb(s, bus, device, 0,
+				pcislot2regaddr[slot],
+				s->irqno);
+	}
+
+	return 0;
+}
+
 /*
  * Common initialization of PCI device.
  * To know detail, see pci specification.
@@ -1011,6 +1198,12 @@ qtest_vdev_uninit(struct qtest_session *s)
 		s->event_th_started = 0;
 	}
 
+	if (s->intr_th_started) {
+		pthread_cancel(s->intr_th);
+		pthread_join(s->intr_th, NULL);
+		s->intr_th_started = 0;
+	}
+
 	pthread_mutex_destroy(&s->qtest_session_lock);
 	qtest_remove_target_devices(s);
 	rte_free(s);
@@ -1018,7 +1211,7 @@ qtest_vdev_uninit(struct qtest_session *s)
 
 struct qtest_session *
 qtest_vdev_init(char *qtest_path, char *ivshmem_path,
-		struct qtest_pci_device *devices, int devnum)
+		int irqno, struct qtest_pci_device *devices, int devnum)
 {
 	struct qtest_session *s;
 	int ret;
@@ -1042,12 +1235,21 @@ qtest_vdev_init(char *qtest_path, char *ivshmem_path,
 		goto error;
 	}
 
+	s->eventfd = eventfd(0, 0);
+	if (s->eventfd < 0) {
+		PMD_DRV_LOG(ERR, "Failed to open eventfd");
+		goto error;
+	}
+
 	ret = qtest_register_target_devices(s, devices, devnum);
 	if (ret != 0) {
 		PMD_DRV_LOG(ERR, "Failed to initialize qtest session");
 		goto error;
 	}
 
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->irqno = irqno;
+
 	s->ivshmem_socket = qtest_open_socket(ivshmem_path);
 	if (s->ivshmem_socket < 0) {
 		PMD_DRV_LOG(ERR, "Failed to open %s", ivshmem_path);
@@ -1067,6 +1269,19 @@ qtest_vdev_init(char *qtest_path, char *ivshmem_path,
 	}
 	s->event_th_started = 1;
 
+	ret = pthread_create(&s->intr_th, NULL, qtest_intr_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create interrupt handler");
+		goto error;
+	}
+	s->intr_th_started = 1;
+
+	ret = qtest_intr_initialize(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize interrupt");
+		goto error;
+	}
+
 	ret = qtest_setup_shared_memory(s);
 	if (ret != 0) {
 		PMD_DRV_LOG(ERR, "Failed to setup shared memory");
diff --git a/drivers/net/virtio/virtio_qtest/qtest_utils.h b/drivers/net/virtio/virtio_qtest/qtest_utils.h
index e41374f..c1abc39 100644
--- a/drivers/net/virtio/virtio_qtest/qtest_utils.h
+++ b/drivers/net/virtio/virtio_qtest/qtest_utils.h
@@ -134,6 +134,8 @@ struct qtest_pci_device {
  *   Path of qtest socket.
  * @param ivshmem_path
  *   Path of ivshmem socket.
+ * @param irqno
+ *   Interrupt number of the target device(virtio-net device).
  * @param devices
  *   Array of device information. It should contain piix3, ivshmem and target
  *   device(virtio-net device).
@@ -143,7 +145,7 @@ struct qtest_pci_device {
  *   The pointer to qtest session structure.
  */
 struct qtest_session *qtest_vdev_init(char *qtest_path, char *ivshmem_path,
-		struct qtest_pci_device *devices, int devnum);
+		int irqno, struct qtest_pci_device *devices, int devnum);
 
 /**
  * @internal
@@ -156,6 +158,56 @@ void qtest_vdev_uninit(struct qtest_session *s);
 
 /**
  * @internal
+ * Register interrupt callback.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param cb
+ *   The pointer to callback.
+ * @param cb_arg
+ *   The pointer to callback argument.
+ */
+void qtest_intr_callback_register(struct qtest_session *s,
+		rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * @internal
+ * Unregister interrupt callback.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param cb
+ *   The pointer to callback.
+ * @param cb_arg
+ *   The pointer to callback argument.
+ */
+void qtest_intr_callback_unregister(struct qtest_session *s,
+		rte_intr_callback_fn cb, void *cb_arg);
+
+/**
+ * @internal
+ * Enable interrupt callback.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_intr_enable(struct qtest_session *s);
+
+/**
+ * @internal
+ * Disable interrupt callback.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_intr_disable(struct qtest_session *s);
+
+/**
+ * @internal
  * Read a port of QEMU guest.
  *
  * @param s
@@ -274,6 +326,20 @@ int qtest_get_bar_size(struct qtest_session *s, const char *name,
 
 /**
  * @internal
+ * Initialization function of piix3 device.
+ *
+ * @param s
+ *   The pointer to qtest session structure.
+ * @param dev
+ *   The pointer of pci device.
+ * @return
+ *   0 on success, negative on error
+ */
+int qtest_init_piix3_device(struct qtest_session *s,
+			struct qtest_pci_device *dev);
+
+/**
+ * @internal
  * Initialization function of general device.
  *
  * @param s
diff --git a/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c b/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c
index dec38ff..78a87b5 100644
--- a/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c
+++ b/drivers/net/virtio/virtio_qtest/virtio_qtest_dev.c
@@ -51,12 +51,14 @@
 #define ETH_VIRTIO_NET_ARG_IVSHMEM_PATH         "ivshmem"
 #define ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR      "virtio-net-addr"
 #define ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR         "ivshmem-addr"
+#define ETH_VIRTIO_NET_ARG_PIIX3_ADDR           "piix3-addr"
 
 static const char *valid_qtest_args[] = {
 	ETH_VIRTIO_NET_ARG_QTEST_PATH,
 	ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
 	ETH_VIRTIO_NET_ARG_VIRTIO_NET_ADDR,
 	ETH_VIRTIO_NET_ARG_IVSHMEM_ADDR,
+	ETH_VIRTIO_NET_ARG_PIIX3_ADDR,
 	NULL
 };
 
@@ -197,7 +199,7 @@ static int
 virtio_prepare_target_devices(struct qtest_pci_device *devices,
 			struct rte_kvargs *kvlist)
 {
-	struct qtest_pci_device *virtio_net, *ivshmem;
+	struct qtest_pci_device *virtio_net, *ivshmem, *piix3;
 	struct rte_pci_addr default_addr;
 	const struct rte_memseg *ms;
 	int ret;
@@ -224,6 +226,7 @@ virtio_prepare_target_devices(struct qtest_pci_device *devices,
 
 	virtio_net = &devices[0];
 	ivshmem = &devices[1];
+	piix3 = &devices[2];
 
 	virtio_net->name = "virtio-net";
 	virtio_net->device_id = VIRTIO_NET_DEVICE_ID;
@@ -251,6 +254,12 @@ virtio_prepare_target_devices(struct qtest_pci_device *devices,
 	/* In host mode, only one memory segment is vaild */
 	ivshmem->bar[2].region_start = (uint64_t)ms[0].addr;
 
+	/* piix3 is needed to route irqs from virtio-net to ioapic */
+	piix3->name = "piix3";
+	piix3->device_id = PIIX3_DEVICE_ID;
+	piix3->vendor_id = PIIX3_VENDOR_ID;
+	piix3->init = qtest_init_piix3_device;
+
 	/*
 	 * Set pci addresses specified by command line.
 	 * QTest utils will only check specified pci address.
@@ -274,6 +283,13 @@ virtio_prepare_target_devices(struct qtest_pci_device *devices,
 	if (ret < 0)
 		return -1;
 
+	default_addr.devid = 1;
+	ret = virtio_net_eth_pmd_parse_pci_addr(kvlist,
+			ETH_VIRTIO_NET_ARG_PIIX3_ADDR,
+			&piix3->specified_addr, &default_addr);
+	if (ret < 0)
+		return -1;
+
 	return 0;
 }
 /*
@@ -318,7 +334,7 @@ rte_qtest_virtio_pmd_init(const char *name, const char *params)
 
 	hw = eth_dev->data->dev_private;
 	hw->virtio_user_dev = qtest_vdev_init(qtest_path, ivshmem_path,
-			devices, QTEST_DEVICE_NUM);
+			VIRTIO_NET_IRQ_NUM, devices, QTEST_DEVICE_NUM);
 	if (hw->virtio_user_dev == NULL)
 		goto error;
 
@@ -331,8 +347,9 @@ rte_qtest_virtio_pmd_init(const char *name, const char *params)
 	if (ret < 0)
 		goto error;
 
+	TAILQ_INIT(&eth_dev->link_intr_cbs);
+
 	eth_dev->driver = NULL;
-	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
 	eth_dev->data->kdrv = RTE_KDRV_NONE;
 	eth_dev->data->drv_name = QTEST_DRV_NAME;
 
diff --git a/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c b/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c
index d715b13..048cca2 100644
--- a/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c
+++ b/drivers/net/virtio/virtio_qtest/virtio_qtest_pci.c
@@ -58,6 +58,34 @@ check_vq_phys_addr_ok(struct virtqueue *vq)
 	return 1;
 }
 
+static int
+intr_cb_register(struct virtio_hw *hw,
+			rte_intr_callback_fn cb, void *cb_arg)
+{
+	qtest_intr_callback_register(hw->virtio_user_dev, cb, cb_arg);
+	return 0;
+}
+
+static int
+intr_cb_unregister(struct virtio_hw *hw,
+			rte_intr_callback_fn cb, void *cb_arg)
+{
+	qtest_intr_callback_register(hw->virtio_user_dev, cb, cb_arg);
+	return 0;
+}
+
+static int
+intr_enable(struct virtio_hw *hw)
+{
+	return qtest_intr_enable(hw->virtio_user_dev);
+}
+
+static int
+intr_disable(struct virtio_hw *hw)
+{
+	return qtest_intr_disable(hw->virtio_user_dev);
+}
+
 static inline uint8_t
 qtest_read8(struct virtio_hw *hw, uint8_t *addr)
 {
@@ -259,19 +287,23 @@ qtest_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 }
 
 const struct virtio_pci_ops modern_qtest_ops = {
-	.read_dev_cfg	= qtest_read_dev_config,
-	.write_dev_cfg	= qtest_write_dev_config,
-	.reset		= qtest_reset,
-	.get_status	= qtest_get_status,
-	.set_status	= qtest_set_status,
-	.get_features	= qtest_get_features,
-	.set_features	= qtest_set_features,
-	.get_isr	= qtest_get_isr,
-	.set_config_irq	= qtest_set_config_irq,
-	.get_queue_num	= qtest_get_queue_num,
-	.setup_queue	= qtest_setup_queue,
-	.del_queue	= qtest_del_queue,
-	.notify_queue	= qtest_notify_queue,
+	.read_dev_cfg		= qtest_read_dev_config,
+	.write_dev_cfg		= qtest_write_dev_config,
+	.reset			= qtest_reset,
+	.get_status		= qtest_get_status,
+	.set_status		= qtest_set_status,
+	.get_features		= qtest_get_features,
+	.set_features		= qtest_set_features,
+	.get_isr		= qtest_get_isr,
+	.set_config_irq		= qtest_set_config_irq,
+	.get_queue_num		= qtest_get_queue_num,
+	.setup_queue		= qtest_setup_queue,
+	.del_queue		= qtest_del_queue,
+	.notify_queue		= qtest_notify_queue,
+	.intr_cb_register	= intr_cb_register,
+	.intr_cb_unregister	= intr_cb_unregister,
+	.intr_enable		= intr_enable,
+	.intr_disable		= intr_disable,
 };
 
 static void *
@@ -396,9 +428,9 @@ qtest_vtpci_init(struct virtio_hw *hw, uint32_t *dev_flags)
 	if (virtio_read_caps(hw) == 0) {
 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
 		hw->vtpci_ops = &modern_qtest_ops;
-		hw->modern    = 1;
-		/* So far, we don't support LSC interrupt */
-		*dev_flags = 0;
+		hw->use_msix = 0;
+		hw->modern = 1;
+		*dev_flags |= RTE_ETH_DEV_INTR_LSC;
 		return 0;
 	}
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-02  3:29           ` [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
@ 2016-06-02  7:31             ` Yuanhan Liu
  2016-06-02  9:30               ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Yuanhan Liu @ 2016-06-02  7:31 UTC (permalink / raw)
  To: Tetsuya Mukawa
  Cc: dev, jianfeng.tan, huawei.xie, Thomas Monjalon, David Marchand

On Thu, Jun 02, 2016 at 12:29:39PM +0900, Tetsuya Mukawa wrote:
> The patches will work on below patch series.
>  - [PATCH v5 0/8] virtio support for container
> 
> It seems his implementation will be changed a bit.
> So, this patch series are also going to be changed to follow his implementation.

Hi Tetsuya,

TBH, I was considering to reject your v4: the code was quite messy. But
this v5 changed my mind a bit: it's much cleaner.

But still, I'd ask do we really need 2 virtio for container solutions?

That results to the same question that I'm sure you have already
answered before: in which way your solution outweighs Jianfeng's?

The reason I want to ask again is: 1), I wasn't actively participating
the discussion in last release, besides some common comments on virtio,
2), maybe it's time to make a decision that should we take one solution
only, if so, which one, or should we take both?

Thomas is Cc'ed, hope he can help on the decision making.

	--yliu

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-02  7:31             ` Yuanhan Liu
@ 2016-06-02  9:30               ` Tetsuya Mukawa
  2016-06-03  4:17                 ` Yuanhan Liu
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-02  9:30 UTC (permalink / raw)
  To: Yuanhan Liu
  Cc: dev, jianfeng.tan, huawei.xie, Thomas Monjalon, David Marchand

Hi Yuanhan,

On 2016/06/02 16:31, Yuanhan Liu wrote:
> But still, I'd ask do we really need 2 virtio for container solutions?

I appreciate your comments.
Let me have time to discuss it with our team.

Thanks,
Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-02  9:30               ` Tetsuya Mukawa
@ 2016-06-03  4:17                 ` Yuanhan Liu
  2016-06-03 13:51                   ` Thomas Monjalon
  2016-06-06  5:10                   ` Tetsuya Mukawa
  0 siblings, 2 replies; 120+ messages in thread
From: Yuanhan Liu @ 2016-06-03  4:17 UTC (permalink / raw)
  To: Tetsuya Mukawa
  Cc: dev, jianfeng.tan, huawei.xie, Thomas Monjalon, David Marchand

On Thu, Jun 02, 2016 at 06:30:18PM +0900, Tetsuya Mukawa wrote:
> Hi Yuanhan,
> 
> On 2016/06/02 16:31, Yuanhan Liu wrote:
> > But still, I'd ask do we really need 2 virtio for container solutions?
> 
> I appreciate your comments.

No, I appreciate your effort for contributing to DPDK! vhost-pmd stuff
is just brilliant!

> Let me have time to discuss it with our team.

I'm wondering could we have one solution only. IMO, the drawback of
having two (quite different) solutions might outweighs the benefit
it takes. Say, it might just confuse user.

OTOH, I'm wondering could you adapt to Jianfeng's solution? If not,
what's the missing parts, and could we fix it? I'm thinking having
one unified solution will keep ours energy/focus on one thing, making
it better and better! Having two just splits the energy; it also
introduces extra burden for maintaining.

	--yliu

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-03  4:17                 ` Yuanhan Liu
@ 2016-06-03 13:51                   ` Thomas Monjalon
  2016-06-06  5:10                   ` Tetsuya Mukawa
  1 sibling, 0 replies; 120+ messages in thread
From: Thomas Monjalon @ 2016-06-03 13:51 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: Tetsuya Mukawa, dev, jianfeng.tan, huawei.xie, David Marchand

2016-06-03 12:17, Yuanhan Liu:
> On Thu, Jun 02, 2016 at 06:30:18PM +0900, Tetsuya Mukawa wrote:
> > Hi Yuanhan,
> > 
> > On 2016/06/02 16:31, Yuanhan Liu wrote:
> > > But still, I'd ask do we really need 2 virtio for container solutions?
> > 
> > I appreciate your comments.
> 
> No, I appreciate your effort for contributing to DPDK! vhost-pmd stuff
> is just brilliant!
> 
> > Let me have time to discuss it with our team.
> 
> I'm wondering could we have one solution only. IMO, the drawback of
> having two (quite different) solutions might outweighs the benefit
> it takes. Say, it might just confuse user.

+1

> OTOH, I'm wondering could you adapt to Jianfeng's solution? If not,
> what's the missing parts, and could we fix it? I'm thinking having
> one unified solution will keep ours energy/focus on one thing, making
> it better and better! Having two just splits the energy; it also
> introduces extra burden for maintaining.

+1

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-03  4:17                 ` Yuanhan Liu
  2016-06-03 13:51                   ` Thomas Monjalon
@ 2016-06-06  5:10                   ` Tetsuya Mukawa
  2016-06-06  7:21                     ` Yuanhan Liu
  2016-06-06  8:03                     ` Tan, Jianfeng
  1 sibling, 2 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-06  5:10 UTC (permalink / raw)
  To: Yuanhan Liu
  Cc: dev, jianfeng.tan, huawei.xie, Thomas Monjalon, David Marchand,
	nakajima.yoshihiro

Hi Yuanhan,

Sorry for late replying.

On 2016/06/03 13:17, Yuanhan Liu wrote:
> On Thu, Jun 02, 2016 at 06:30:18PM +0900, Tetsuya Mukawa wrote:
>> Hi Yuanhan,
>>
>> On 2016/06/02 16:31, Yuanhan Liu wrote:
>>> But still, I'd ask do we really need 2 virtio for container solutions?
>>
>> I appreciate your comments.
> 
> No, I appreciate your effort for contributing to DPDK! vhost-pmd stuff
> is just brilliant!
> 
>> Let me have time to discuss it with our team.
> 
> I'm wondering could we have one solution only. IMO, the drawback of
> having two (quite different) solutions might outweighs the benefit
> it takes. Say, it might just confuse user.

I agree with this.
If we have 2 solutions, it would confuse the DPDK users.

> 
> OTOH, I'm wondering could you adapt to Jianfeng's solution? If not,
> what's the missing parts, and could we fix it? I'm thinking having
> one unified solution will keep ours energy/focus on one thing, making
> it better and better! Having two just splits the energy; it also
> introduces extra burden for maintaining.

Of course, I adopt Jiangeng's solution basically.
Actually, his solution is almost similar I tried to implement at first.

I guess here is pros/cons of 2 solutions.

[Jianfeng's solution]
- Pros
Don't need to invoke QEMU process.
- Cons
If virtio-net specification is changed, we need to implement it by
ourselves. Also, LSC interrupt and control queue functions are not
supported yet.
I agree both functions may not be so important, and if we need it
we can implement them, but we need to pay energy to implement them.

[My solution]
- Pros
Basic principle of my implementation is not to reinvent the wheel.
We can use a virtio-net device of QEMU implementation, it means we don't
need to maintain virtio-net device by ourselves, and we can use all of
functions supported by QEMU virtio-net device.
- Cons
Need to invoke QEMU process.


Anyway, we can choose one of belows.
1. Take advantage of invoking less processes.
2. Take advantage of maintainability of virtio-net device.

Honestly, I'm OK if my solution is not merged.
Thus, it should be decided to let DPDK better.

What do you think?
Which is better for DPDK?

Thanks,
Tetsuya

> 
> 	--yliu
> 

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06  5:10                   ` Tetsuya Mukawa
@ 2016-06-06  7:21                     ` Yuanhan Liu
  2016-06-06  8:33                       ` Tetsuya Mukawa
  2016-06-06  8:03                     ` Tan, Jianfeng
  1 sibling, 1 reply; 120+ messages in thread
From: Yuanhan Liu @ 2016-06-06  7:21 UTC (permalink / raw)
  To: Tetsuya Mukawa
  Cc: dev, jianfeng.tan, huawei.xie, Thomas Monjalon, David Marchand,
	nakajima.yoshihiro

On Mon, Jun 06, 2016 at 02:10:46PM +0900, Tetsuya Mukawa wrote:
> Hi Yuanhan,
> 
> Sorry for late replying.

Never mind.

> 
> On 2016/06/03 13:17, Yuanhan Liu wrote:
> > On Thu, Jun 02, 2016 at 06:30:18PM +0900, Tetsuya Mukawa wrote:
> >> Hi Yuanhan,
> >>
> >> On 2016/06/02 16:31, Yuanhan Liu wrote:
> >>> But still, I'd ask do we really need 2 virtio for container solutions?
> >>
> >> I appreciate your comments.
> > 
> > No, I appreciate your effort for contributing to DPDK! vhost-pmd stuff
> > is just brilliant!
> > 
> >> Let me have time to discuss it with our team.
> > 
> > I'm wondering could we have one solution only. IMO, the drawback of
> > having two (quite different) solutions might outweighs the benefit
> > it takes. Say, it might just confuse user.
> 
> I agree with this.
> If we have 2 solutions, it would confuse the DPDK users.
> 
> > 
> > OTOH, I'm wondering could you adapt to Jianfeng's solution? If not,
> > what's the missing parts, and could we fix it? I'm thinking having
> > one unified solution will keep ours energy/focus on one thing, making
> > it better and better! Having two just splits the energy; it also
> > introduces extra burden for maintaining.
> 
> Of course, I adopt Jiangeng's solution basically.
> Actually, his solution is almost similar I tried to implement at first.
> 
> I guess here is pros/cons of 2 solutions.
> 
> [Jianfeng's solution]
> - Pros
> Don't need to invoke QEMU process.
> - Cons
> If virtio-net specification is changed, we need to implement it by
> ourselves. Also, LSC interrupt and control queue functions are not
> supported yet.

Jianfeng have made and sent out the patch to enable ctrl queue and
multiple queue support.

For the LSC part, no much idea yet so far. But I'm assuming it will
not take too much effort, either.

> I agree both functions may not be so important, and if we need it
> we can implement them, but we need to pay energy to implement them.
> 
> [My solution]
> - Pros
> Basic principle of my implementation is not to reinvent the wheel.

Yes, that's a good point. However, it's not that hard as we would have
thought in the first time: the tough part that dequeue/enqueue packets
from/to vring is actually offloaded to DPDK vhost-user. That means we
only need re-implement the control path of virtio-net device, plus the
vhost-user frontend. If you have a detailed look of your patchset as
well Jianfeng's, you might find that the two patchset are actually with
same code size. 

> We can use a virtio-net device of QEMU implementation, it means we don't
> need to maintain virtio-net device by ourselves, and we can use all of
> functions supported by QEMU virtio-net device.
> - Cons
> Need to invoke QEMU process.

Another thing is that it makes the usage a bit harder: look at the
long qemu cli options of your example usage. It also has some traps,
say, "--enable-kvm" is not allowed, which is a default option used
with QEMU.

And judging that we actually don't take too much effort to implement
a virtio device emulation, I'd prefer it slightly. I guess something
light weight and easier for use is more important here.

Actually, I have foreseen another benefit of adding virtio-user device
emulation: we now might be able to add a rte_vhost_dequeue/enqueue_burst()
unit test case. We simply can't do it before, since we depend on QEMU
for testing, which is not acceptable for a unit test case. Making it
be a unit test case would help us spotting any bad changes that would
introduce bugs easily and automatically.

	--yliu

> Anyway, we can choose one of belows.
> 1. Take advantage of invoking less processes.
> 2. Take advantage of maintainability of virtio-net device.
> 
> Honestly, I'm OK if my solution is not merged.
> Thus, it should be decided to let DPDK better.
> 
> What do you think?
> Which is better for DPDK?
> 
> Thanks,
> Tetsuya
> 
> > 
> > 	--yliu
> > 

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06  5:10                   ` Tetsuya Mukawa
  2016-06-06  7:21                     ` Yuanhan Liu
@ 2016-06-06  8:03                     ` Tan, Jianfeng
  2016-06-06  9:28                       ` Tetsuya Mukawa
  1 sibling, 1 reply; 120+ messages in thread
From: Tan, Jianfeng @ 2016-06-06  8:03 UTC (permalink / raw)
  To: Tetsuya Mukawa, Yuanhan Liu
  Cc: dev, huawei.xie, Thomas Monjalon, David Marchand, nakajima.yoshihiro

Hi,


On 6/6/2016 1:10 PM, Tetsuya Mukawa wrote:
> Hi Yuanhan,
>
> Sorry for late replying.
>
> On 2016/06/03 13:17, Yuanhan Liu wrote:
>> On Thu, Jun 02, 2016 at 06:30:18PM +0900, Tetsuya Mukawa wrote:
>>> Hi Yuanhan,
>>>
>>> On 2016/06/02 16:31, Yuanhan Liu wrote:
>>>> But still, I'd ask do we really need 2 virtio for container solutions?
>>> I appreciate your comments.
>> No, I appreciate your effort for contributing to DPDK! vhost-pmd stuff
>> is just brilliant!
>>
>>> Let me have time to discuss it with our team.
>> I'm wondering could we have one solution only. IMO, the drawback of
>> having two (quite different) solutions might outweighs the benefit
>> it takes. Say, it might just confuse user.
> I agree with this.
> If we have 2 solutions, it would confuse the DPDK users.
>
>> OTOH, I'm wondering could you adapt to Jianfeng's solution? If not,
>> what's the missing parts, and could we fix it? I'm thinking having
>> one unified solution will keep ours energy/focus on one thing, making
>> it better and better! Having two just splits the energy; it also
>> introduces extra burden for maintaining.
> Of course, I adopt Jiangeng's solution basically.
> Actually, his solution is almost similar I tried to implement at first.
>
> I guess here is pros/cons of 2 solutions.
>
> [Jianfeng's solution]
> - Pros
> Don't need to invoke QEMU process.
> - Cons
> If virtio-net specification is changed, we need to implement it by
> ourselves.

It will barely introduce any change when virtio-net specification is 
changed as far as I can see. The only part we care is the how desc, 
avail, used distribute on memory, which is a very small part.

It's true that my solution now seriously depend on vhost-user protocol, 
which is defined in QEMU. I cannot see a big problem there so far.

>   Also, LSC interrupt and control queue functions are not
> supported yet.
> I agree both functions may not be so important, and if we need it
> we can implement them, but we need to pay energy to implement them.

LSC is really less important than rxq interrupt (IMO). We don't know how 
long will rxq interrupt of virtio be available for QEMU, but we can 
accelerate it if we avoid using QEMU.

Actually, if the vhost backend is vhost-user (the main use case), 
current qemu have limited control queue support, because it needs the 
support from the vhost user backend.

Add one more con of my solution:
- Need to write another logic to support other virtio device (say 
virtio-scsi), if it's easier of Tetsuya's solution to do that?

>
> [My solution]
> - Pros
> Basic principle of my implementation is not to reinvent the wheel.
> We can use a virtio-net device of QEMU implementation, it means we don't
> need to maintain virtio-net device by ourselves, and we can use all of
> functions supported by QEMU virtio-net device.
> - Cons
> Need to invoke QEMU process.

Two more possible cons:
a) This solution also needs to maintain qtest utility, right?
b) There's still address arrange restriction, right? Although we can use 
"--base-virtaddr=0x400000000" to relieve this question, but how about if 
there are 2 or more devices? (By the way, is there still address arrange 
requirement for 32 bit system)
c) Actually, IMO this solution is sensitive to any virtio spec change 
(io port, pci configuration space).

>
>
> Anyway, we can choose one of belows.
> 1. Take advantage of invoking less processes.
> 2. Take advantage of maintainability of virtio-net device.
>
> Honestly, I'm OK if my solution is not merged.
> Thus, it should be decided to let DPDK better.

Yes, agreed.

Thanks,
Jianfeng

>
> What do you think?
> Which is better for DPDK?
>
> Thanks,
> Tetsuya
>
>> 	--yliu
>>

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06  7:21                     ` Yuanhan Liu
@ 2016-06-06  8:33                       ` Tetsuya Mukawa
  2016-06-06  8:49                         ` Yuanhan Liu
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-06  8:33 UTC (permalink / raw)
  To: Yuanhan Liu
  Cc: dev, jianfeng.tan, huawei.xie, Thomas Monjalon, David Marchand,
	nakajima.yoshihiro

On 2016/06/06 16:21, Yuanhan Liu wrote:
> On Mon, Jun 06, 2016 at 02:10:46PM +0900, Tetsuya Mukawa wrote:
>> Hi Yuanhan,
>>
>> Sorry for late replying.
> 
> Never mind.
> 
>>
>> On 2016/06/03 13:17, Yuanhan Liu wrote:
>>> On Thu, Jun 02, 2016 at 06:30:18PM +0900, Tetsuya Mukawa wrote:
>>>> Hi Yuanhan,
>>>>
>>>> On 2016/06/02 16:31, Yuanhan Liu wrote:
>>>>> But still, I'd ask do we really need 2 virtio for container solutions?
>>>>
>>>> I appreciate your comments.
>>>
>>> No, I appreciate your effort for contributing to DPDK! vhost-pmd stuff
>>> is just brilliant!
>>>
>>>> Let me have time to discuss it with our team.
>>>
>>> I'm wondering could we have one solution only. IMO, the drawback of
>>> having two (quite different) solutions might outweighs the benefit
>>> it takes. Say, it might just confuse user.
>>
>> I agree with this.
>> If we have 2 solutions, it would confuse the DPDK users.
>>
>>>
>>> OTOH, I'm wondering could you adapt to Jianfeng's solution? If not,
>>> what's the missing parts, and could we fix it? I'm thinking having
>>> one unified solution will keep ours energy/focus on one thing, making
>>> it better and better! Having two just splits the energy; it also
>>> introduces extra burden for maintaining.
>>
>> Of course, I adopt Jiangeng's solution basically.
>> Actually, his solution is almost similar I tried to implement at first.
>>
>> I guess here is pros/cons of 2 solutions.
>>
>> [Jianfeng's solution]
>> - Pros
>> Don't need to invoke QEMU process.
>> - Cons
>> If virtio-net specification is changed, we need to implement it by
>> ourselves. Also, LSC interrupt and control queue functions are not
>> supported yet.
> 
> Jianfeng have made and sent out the patch to enable ctrl queue and
> multiple queue support.

Sorry, I haven't noticed that ctrl queue has been already enabled.

> 
> For the LSC part, no much idea yet so far. But I'm assuming it will
> not take too much effort, either.
> 
>> I agree both functions may not be so important, and if we need it
>> we can implement them, but we need to pay energy to implement them.
>>
>> [My solution]
>> - Pros
>> Basic principle of my implementation is not to reinvent the wheel.
> 
> Yes, that's a good point. However, it's not that hard as we would have
> thought in the first time: the tough part that dequeue/enqueue packets
> from/to vring is actually offloaded to DPDK vhost-user. That means we
> only need re-implement the control path of virtio-net device, plus the
> vhost-user frontend. If you have a detailed look of your patchset as
> well Jianfeng's, you might find that the two patchset are actually with
> same code size. 

Yes, I know this.
So far, the amount of code is almost same, but in the future we may need
to implement more, if virtio-net specification is revised.

> 
>> We can use a virtio-net device of QEMU implementation, it means we don't
>> need to maintain virtio-net device by ourselves, and we can use all of
>> functions supported by QEMU virtio-net device.
>> - Cons
>> Need to invoke QEMU process.
> 
> Another thing is that it makes the usage a bit harder: look at the
> long qemu cli options of your example usage. It also has some traps,
> say, "--enable-kvm" is not allowed, which is a default option used
> with QEMU.

Probably a kind of shell script will help the users.

> 
> And judging that we actually don't take too much effort to implement
> a virtio device emulation, I'd prefer it slightly. I guess something
> light weight and easier for use is more important here.

This is very important point.
If so, we don't need much effort when virtio-spec is changed.

> 
> Actually, I have foreseen another benefit of adding virtio-user device
> emulation: we now might be able to add a rte_vhost_dequeue/enqueue_burst()
> unit test case. We simply can't do it before, since we depend on QEMU
> for testing, which is not acceptable for a unit test case. Making it
> be a unit test case would help us spotting any bad changes that would
> introduce bugs easily and automatically.

As you mentioned above, QEMU process is not related with
dequeuing/enqueuing.
So I guess we may have a testing for rte_vhost_dequeue/enqueue_burst()
regardless of choice.

>> Anyway, we can choose one of belows.
>> 1. Take advantage of invoking less processes.
>> 2. Take advantage of maintainability of virtio-net device.

If container usage that DPDK assumes is to invoke hundreds containers in
one host, we should take Jiangfeng's solution.

Also, if implementing a new feature and maintaining Jiangfeng's
virtio-net device are not so hard, we should take his solution.

I guess this is the point we need to consider.
What do you think?

Thanks,
Tetsuya

>>
>> Honestly, I'm OK if my solution is not merged.
>> Thus, it should be decided to let DPDK better.
>>
>> What do you think?
>> Which is better for DPDK?
>>
>> Thanks,
>> Tetsuya
>>
>>>
>>> 	--yliu
>>>

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06  8:33                       ` Tetsuya Mukawa
@ 2016-06-06  8:49                         ` Yuanhan Liu
  2016-06-06  9:30                           ` Tetsuya Mukawa
  0 siblings, 1 reply; 120+ messages in thread
From: Yuanhan Liu @ 2016-06-06  8:49 UTC (permalink / raw)
  To: Tetsuya Mukawa
  Cc: dev, jianfeng.tan, huawei.xie, Thomas Monjalon, David Marchand,
	nakajima.yoshihiro

On Mon, Jun 06, 2016 at 05:33:31PM +0900, Tetsuya Mukawa wrote:
> >> [My solution]
> >> - Pros
> >> Basic principle of my implementation is not to reinvent the wheel.
> > 
> > Yes, that's a good point. However, it's not that hard as we would have
> > thought in the first time: the tough part that dequeue/enqueue packets
> > from/to vring is actually offloaded to DPDK vhost-user. That means we
> > only need re-implement the control path of virtio-net device, plus the
> > vhost-user frontend. If you have a detailed look of your patchset as
> > well Jianfeng's, you might find that the two patchset are actually with
> > same code size. 
> 
> Yes, I know this.
> So far, the amount of code is almost same, but in the future we may need
> to implement more, if virtio-net specification is revised.

It didn't take too much effort to implement from scratch, I doubt it
will for future revise. And, virtio-net spec is unlikely revised, or
to be precisely, unlikely revised quite often. Therefore, I don't see
big issues here.

> >> We can use a virtio-net device of QEMU implementation, it means we don't
> >> need to maintain virtio-net device by ourselves, and we can use all of
> >> functions supported by QEMU virtio-net device.
> >> - Cons
> >> Need to invoke QEMU process.
> > 
> > Another thing is that it makes the usage a bit harder: look at the
> > long qemu cli options of your example usage. It also has some traps,
> > say, "--enable-kvm" is not allowed, which is a default option used
> > with QEMU.
> 
> Probably a kind of shell script will help the users.

Yeah, that would help. But if we have a choice to make it simpler in the
beginning, why not then? :-)

> > 
> > And judging that we actually don't take too much effort to implement
> > a virtio device emulation, I'd prefer it slightly. I guess something
> > light weight and easier for use is more important here.
> 
> This is very important point.
> If so, we don't need much effort when virtio-spec is changed.

I'd assume so.

> > Actually, I have foreseen another benefit of adding virtio-user device
> > emulation: we now might be able to add a rte_vhost_dequeue/enqueue_burst()
> > unit test case. We simply can't do it before, since we depend on QEMU
> > for testing, which is not acceptable for a unit test case. Making it
> > be a unit test case would help us spotting any bad changes that would
> > introduce bugs easily and automatically.
> 
> As you mentioned above, QEMU process is not related with
> dequeuing/enqueuing.
> So I guess we may have a testing for rte_vhost_dequeue/enqueue_burst()
> regardless of choice.

Yes, we don't need the dequeue/enqueue part, but we need the vhost-user
initialization part from QEMU vhost-user. Now that we have vhost-user
frontend from virtio-user, we have no dependency on QEMU any more.

> >> Anyway, we can choose one of belows.
> >> 1. Take advantage of invoking less processes.
> >> 2. Take advantage of maintainability of virtio-net device.
> 
> If container usage that DPDK assumes is to invoke hundreds containers in
> one host,

I barely know about container, but I would assume that's not rare.

> we should take Jiangfeng's solution.
> 
> Also, if implementing a new feature and maintaining Jiangfeng's
> virtio-net device are not so hard,

As stated, I would assume so.

	--yliu

> we should take his solution.
> 
> I guess this is the point we need to consider.
> What do you think?
> 
> Thanks,
> Tetsuya
> 
> >>
> >> Honestly, I'm OK if my solution is not merged.
> >> Thus, it should be decided to let DPDK better.
> >>
> >> What do you think?
> >> Which is better for DPDK?
> >>
> >> Thanks,
> >> Tetsuya
> >>
> >>>
> >>> 	--yliu
> >>>

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06  8:03                     ` Tan, Jianfeng
@ 2016-06-06  9:28                       ` Tetsuya Mukawa
  2016-06-06 10:35                         ` Tan, Jianfeng
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-06  9:28 UTC (permalink / raw)
  To: Tan, Jianfeng, Yuanhan Liu
  Cc: dev, huawei.xie, Thomas Monjalon, David Marchand, nakajima.yoshihiro

On 2016/06/06 17:03, Tan, Jianfeng wrote:
> Hi,
> 
> 
> On 6/6/2016 1:10 PM, Tetsuya Mukawa wrote:
>> Hi Yuanhan,
>>
>> Sorry for late replying.
>>
>> On 2016/06/03 13:17, Yuanhan Liu wrote:
>>> On Thu, Jun 02, 2016 at 06:30:18PM +0900, Tetsuya Mukawa wrote:
>>>> Hi Yuanhan,
>>>>
>>>> On 2016/06/02 16:31, Yuanhan Liu wrote:
>>>>> But still, I'd ask do we really need 2 virtio for container solutions?
>>>> I appreciate your comments.
>>> No, I appreciate your effort for contributing to DPDK! vhost-pmd stuff
>>> is just brilliant!
>>>
>>>> Let me have time to discuss it with our team.
>>> I'm wondering could we have one solution only. IMO, the drawback of
>>> having two (quite different) solutions might outweighs the benefit
>>> it takes. Say, it might just confuse user.
>> I agree with this.
>> If we have 2 solutions, it would confuse the DPDK users.
>>
>>> OTOH, I'm wondering could you adapt to Jianfeng's solution? If not,
>>> what's the missing parts, and could we fix it? I'm thinking having
>>> one unified solution will keep ours energy/focus on one thing, making
>>> it better and better! Having two just splits the energy; it also
>>> introduces extra burden for maintaining.
>> Of course, I adopt Jiangeng's solution basically.
>> Actually, his solution is almost similar I tried to implement at first.
>>
>> I guess here is pros/cons of 2 solutions.
>>
>> [Jianfeng's solution]
>> - Pros
>> Don't need to invoke QEMU process.
>> - Cons
>> If virtio-net specification is changed, we need to implement it by
>> ourselves.
> 
> It will barely introduce any change when virtio-net specification is
> changed as far as I can see. The only part we care is the how desc,
> avail, used distribute on memory, which is a very small part.

It's a good news, because we don't pay much effort to follow latest
virtio-net specification.

> 
> It's true that my solution now seriously depend on vhost-user protocol,
> which is defined in QEMU. I cannot see a big problem there so far.
> 
>>   Also, LSC interrupt and control queue functions are not
>> supported yet.
>> I agree both functions may not be so important, and if we need it
>> we can implement them, but we need to pay energy to implement them.
> 
> LSC is really less important than rxq interrupt (IMO). We don't know how
> long will rxq interrupt of virtio be available for QEMU, but we can
> accelerate it if we avoid using QEMU.
> 
> Actually, if the vhost backend is vhost-user (the main use case),
> current qemu have limited control queue support, because it needs the
> support from the vhost user backend.
> 
> Add one more con of my solution:
> - Need to write another logic to support other virtio device (say
> virtio-scsi), if it's easier of Tetsuya's solution to do that?
> 

Probably, my solution will be easier to do that.
My solution has enough facility to access to io port and PCI
configuration space of virtio-scsi device of QEMU.
So, if you invoke with QEMU with virtio-scsi, only you need to do is
changing PCI interface of current virtio-scsi PMD.
(I just assume currently we have virtio-scsi PMD.)
If the virtio-scsi PMD works on QEMU, same code should work with only
changing PCI interface.

>>
>> [My solution]
>> - Pros
>> Basic principle of my implementation is not to reinvent the wheel.
>> We can use a virtio-net device of QEMU implementation, it means we don't
>> need to maintain virtio-net device by ourselves, and we can use all of
>> functions supported by QEMU virtio-net device.
>> - Cons
>> Need to invoke QEMU process.
> 
> Two more possible cons:
> a) This solution also needs to maintain qtest utility, right?

But the spec of qtest will be more stable than virtio-net.

> b) There's still address arrange restriction, right? Although we can use
> "--base-virtaddr=0x400000000" to relieve this question, but how about if
> there are 2 or more devices? (By the way, is there still address arrange
> requirement for 32 bit system)

Our solutions are a virtio-net driver, and a vhost-user backend driver
needs to access to memory allocated by virtio-net driver.
If an application has 2 devices, it means 2 vhost-user backend PMD needs
to access to the same application memory, right?
Also, currently each virtio-net device has an one QEMU process.
So, I am not sure what will be problem if we have 2 devices.

BTW, 44bits limitations comes from current QEMU implementation itself.
(Actually, if modern virtio device is used, we should be able to remove
the restriction.)

> c) Actually, IMO this solution is sensitive to any virtio spec change
> (io port, pci configuration space).

In this case, virtio-net PMD itself will need to be fixed.
Then, my implementation will be also fixed with the same way.
Current implementation has only PCI abstraction that Yuanhan introduced,
so you may think my solution depends on above things, but actually, my
implementation depends on only how to access to io port and PCI
configuration space. This is what "qtest.h" provides.

Thanks,
Tetsuya

> 
>>
>>
>> Anyway, we can choose one of belows.
>> 1. Take advantage of invoking less processes.
>> 2. Take advantage of maintainability of virtio-net device.
>>
>> Honestly, I'm OK if my solution is not merged.
>> Thus, it should be decided to let DPDK better.
> 
> Yes, agreed.
> 
> Thanks,
> Jianfeng
> 
>>
>> What do you think?
>> Which is better for DPDK?
>>
>> Thanks,
>> Tetsuya
>>
>>>     --yliu
>>>
> 

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06  8:49                         ` Yuanhan Liu
@ 2016-06-06  9:30                           ` Tetsuya Mukawa
  2016-06-06  9:58                             ` Yuanhan Liu
  2016-06-06 10:50                             ` Tan, Jianfeng
  0 siblings, 2 replies; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-06  9:30 UTC (permalink / raw)
  To: Yuanhan Liu
  Cc: dev, jianfeng.tan, huawei.xie, Thomas Monjalon, David Marchand,
	nakajima.yoshihiro

On 2016/06/06 17:49, Yuanhan Liu wrote:
> On Mon, Jun 06, 2016 at 05:33:31PM +0900, Tetsuya Mukawa wrote:
>>>> [My solution]
>>>> - Pros
>>>> Basic principle of my implementation is not to reinvent the wheel.
>>>
>>> Yes, that's a good point. However, it's not that hard as we would have
>>> thought in the first time: the tough part that dequeue/enqueue packets
>>> from/to vring is actually offloaded to DPDK vhost-user. That means we
>>> only need re-implement the control path of virtio-net device, plus the
>>> vhost-user frontend. If you have a detailed look of your patchset as
>>> well Jianfeng's, you might find that the two patchset are actually with
>>> same code size. 
>>
>> Yes, I know this.
>> So far, the amount of code is almost same, but in the future we may need
>> to implement more, if virtio-net specification is revised.
> 
> It didn't take too much effort to implement from scratch, I doubt it
> will for future revise. And, virtio-net spec is unlikely revised, or
> to be precisely, unlikely revised quite often. Therefore, I don't see
> big issues here.
> 
>>>> We can use a virtio-net device of QEMU implementation, it means we don't
>>>> need to maintain virtio-net device by ourselves, and we can use all of
>>>> functions supported by QEMU virtio-net device.
>>>> - Cons
>>>> Need to invoke QEMU process.
>>>
>>> Another thing is that it makes the usage a bit harder: look at the
>>> long qemu cli options of your example usage. It also has some traps,
>>> say, "--enable-kvm" is not allowed, which is a default option used
>>> with QEMU.
>>
>> Probably a kind of shell script will help the users.
> 
> Yeah, that would help. But if we have a choice to make it simpler in the
> beginning, why not then? :-)
> 
>>>
>>> And judging that we actually don't take too much effort to implement
>>> a virtio device emulation, I'd prefer it slightly. I guess something
>>> light weight and easier for use is more important here.
>>
>> This is very important point.
>> If so, we don't need much effort when virtio-spec is changed.
> 
> I'd assume so.
> 
>>> Actually, I have foreseen another benefit of adding virtio-user device
>>> emulation: we now might be able to add a rte_vhost_dequeue/enqueue_burst()
>>> unit test case. We simply can't do it before, since we depend on QEMU
>>> for testing, which is not acceptable for a unit test case. Making it
>>> be a unit test case would help us spotting any bad changes that would
>>> introduce bugs easily and automatically.
>>
>> As you mentioned above, QEMU process is not related with
>> dequeuing/enqueuing.
>> So I guess we may have a testing for rte_vhost_dequeue/enqueue_burst()
>> regardless of choice.
> 
> Yes, we don't need the dequeue/enqueue part, but we need the vhost-user
> initialization part from QEMU vhost-user. Now that we have vhost-user
> frontend from virtio-user, we have no dependency on QEMU any more.
> 
>>>> Anyway, we can choose one of belows.
>>>> 1. Take advantage of invoking less processes.
>>>> 2. Take advantage of maintainability of virtio-net device.
>>
>> If container usage that DPDK assumes is to invoke hundreds containers in
>> one host,
> 
> I barely know about container, but I would assume that's not rare.

Hi Yuanhan,

It's great to hear it's not so hard to maintain Jiangfeng's virtio-net
device features.

Please let me make sure how we can invoke many DPDK applications in
hundreds containers.
(Do we have a way to do? Or, will we have it in the future?)

Thanks,
Tetsuya

> 
>> we should take Jiangfeng's solution.
>>
>> Also, if implementing a new feature and maintaining Jiangfeng's
>> virtio-net device are not so hard,
> 
> As stated, I would assume so.




> 
> 	--yliu
> 
>> we should take his solution.
>>
>> I guess this is the point we need to consider.
>> What do you think?
>>
>> Thanks,
>> Tetsuya
>>
>>>>
>>>> Honestly, I'm OK if my solution is not merged.
>>>> Thus, it should be decided to let DPDK better.
>>>>
>>>> What do you think?
>>>> Which is better for DPDK?
>>>>
>>>> Thanks,
>>>> Tetsuya
>>>>
>>>>>
>>>>> 	--yliu
>>>>>

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06  9:30                           ` Tetsuya Mukawa
@ 2016-06-06  9:58                             ` Yuanhan Liu
  2016-06-06 10:50                             ` Tan, Jianfeng
  1 sibling, 0 replies; 120+ messages in thread
From: Yuanhan Liu @ 2016-06-06  9:58 UTC (permalink / raw)
  To: Tetsuya Mukawa
  Cc: dev, jianfeng.tan, huawei.xie, Thomas Monjalon, David Marchand,
	nakajima.yoshihiro

On Mon, Jun 06, 2016 at 06:30:00PM +0900, Tetsuya Mukawa wrote:
> On 2016/06/06 17:49, Yuanhan Liu wrote:
> > On Mon, Jun 06, 2016 at 05:33:31PM +0900, Tetsuya Mukawa wrote:
> >>>> [My solution]
> >>>> - Pros
> >>>> Basic principle of my implementation is not to reinvent the wheel.
> >>>
> >>> Yes, that's a good point. However, it's not that hard as we would have
> >>> thought in the first time: the tough part that dequeue/enqueue packets
> >>> from/to vring is actually offloaded to DPDK vhost-user. That means we
> >>> only need re-implement the control path of virtio-net device, plus the
> >>> vhost-user frontend. If you have a detailed look of your patchset as
> >>> well Jianfeng's, you might find that the two patchset are actually with
> >>> same code size. 
> >>
> >> Yes, I know this.
> >> So far, the amount of code is almost same, but in the future we may need
> >> to implement more, if virtio-net specification is revised.
> > 
> > It didn't take too much effort to implement from scratch, I doubt it
> > will for future revise. And, virtio-net spec is unlikely revised, or
> > to be precisely, unlikely revised quite often. Therefore, I don't see
> > big issues here.
> > 
> >>>> We can use a virtio-net device of QEMU implementation, it means we don't
> >>>> need to maintain virtio-net device by ourselves, and we can use all of
> >>>> functions supported by QEMU virtio-net device.
> >>>> - Cons
> >>>> Need to invoke QEMU process.
> >>>
> >>> Another thing is that it makes the usage a bit harder: look at the
> >>> long qemu cli options of your example usage. It also has some traps,
> >>> say, "--enable-kvm" is not allowed, which is a default option used
> >>> with QEMU.
> >>
> >> Probably a kind of shell script will help the users.
> > 
> > Yeah, that would help. But if we have a choice to make it simpler in the
> > beginning, why not then? :-)
> > 
> >>>
> >>> And judging that we actually don't take too much effort to implement
> >>> a virtio device emulation, I'd prefer it slightly. I guess something
> >>> light weight and easier for use is more important here.
> >>
> >> This is very important point.
> >> If so, we don't need much effort when virtio-spec is changed.
> > 
> > I'd assume so.
> > 
> >>> Actually, I have foreseen another benefit of adding virtio-user device
> >>> emulation: we now might be able to add a rte_vhost_dequeue/enqueue_burst()
> >>> unit test case. We simply can't do it before, since we depend on QEMU
> >>> for testing, which is not acceptable for a unit test case. Making it
> >>> be a unit test case would help us spotting any bad changes that would
> >>> introduce bugs easily and automatically.
> >>
> >> As you mentioned above, QEMU process is not related with
> >> dequeuing/enqueuing.
> >> So I guess we may have a testing for rte_vhost_dequeue/enqueue_burst()
> >> regardless of choice.
> > 
> > Yes, we don't need the dequeue/enqueue part, but we need the vhost-user
> > initialization part from QEMU vhost-user. Now that we have vhost-user
> > frontend from virtio-user, we have no dependency on QEMU any more.
> > 
> >>>> Anyway, we can choose one of belows.
> >>>> 1. Take advantage of invoking less processes.
> >>>> 2. Take advantage of maintainability of virtio-net device.
> >>
> >> If container usage that DPDK assumes is to invoke hundreds containers in
> >> one host,
> > 
> > I barely know about container, but I would assume that's not rare.
> 
> Hi Yuanhan,
> 
> It's great to hear it's not so hard to maintain Jiangfeng's virtio-net
> device features.
> 
> Please let me make sure how we can invoke many DPDK applications in
> hundreds containers.
> (Do we have a way to do? Or, will we have it in the future?)

One thing that I have thought of is that we should remove the huge page
dependency of current usage: huge page would be a very limited resource.

Note that I don't mean to remove support of huge page; DPDK supports
that by default and support it well after all. What I mean is to make
it work for the non-hugepage cases as well, so that it could fit for
the hundreds of containers case.

	--yliu

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06  9:28                       ` Tetsuya Mukawa
@ 2016-06-06 10:35                         ` Tan, Jianfeng
  0 siblings, 0 replies; 120+ messages in thread
From: Tan, Jianfeng @ 2016-06-06 10:35 UTC (permalink / raw)
  To: Tetsuya Mukawa, Yuanhan Liu
  Cc: dev, huawei.xie, Thomas Monjalon, David Marchand, nakajima.yoshihiro

Hi,

On 6/6/2016 5:28 PM, Tetsuya Mukawa wrote:
> On 2016/06/06 17:03, Tan, Jianfeng wrote:
>> Hi,
>>
>>
>> On 6/6/2016 1:10 PM, Tetsuya Mukawa wrote:
>>> Hi Yuanhan,
>>>
>>> Sorry for late replying.
>>>
>>> On 2016/06/03 13:17, Yuanhan Liu wrote:
>>>> On Thu, Jun 02, 2016 at 06:30:18PM +0900, Tetsuya Mukawa wrote:
>>>>> Hi Yuanhan,
>>>>>
>>>>> On 2016/06/02 16:31, Yuanhan Liu wrote:
>>>>>> But still, I'd ask do we really need 2 virtio for container solutions?
>>>>> I appreciate your comments.
>>>> No, I appreciate your effort for contributing to DPDK! vhost-pmd stuff
>>>> is just brilliant!
>>>>
>>>>> Let me have time to discuss it with our team.
>>>> I'm wondering could we have one solution only. IMO, the drawback of
>>>> having two (quite different) solutions might outweighs the benefit
>>>> it takes. Say, it might just confuse user.
>>> I agree with this.
>>> If we have 2 solutions, it would confuse the DPDK users.
>>>
>>>> OTOH, I'm wondering could you adapt to Jianfeng's solution? If not,
>>>> what's the missing parts, and could we fix it? I'm thinking having
>>>> one unified solution will keep ours energy/focus on one thing, making
>>>> it better and better! Having two just splits the energy; it also
>>>> introduces extra burden for maintaining.
>>> Of course, I adopt Jiangeng's solution basically.
>>> Actually, his solution is almost similar I tried to implement at first.
>>>
>>> I guess here is pros/cons of 2 solutions.
>>>
>>> [Jianfeng's solution]
>>> - Pros
>>> Don't need to invoke QEMU process.
>>> - Cons
>>> If virtio-net specification is changed, we need to implement it by
>>> ourselves.
>> It will barely introduce any change when virtio-net specification is
>> changed as far as I can see. The only part we care is the how desc,
>> avail, used distribute on memory, which is a very small part.
> It's a good news, because we don't pay much effort to follow latest
> virtio-net specification.
>
>> It's true that my solution now seriously depend on vhost-user protocol,
>> which is defined in QEMU. I cannot see a big problem there so far.
>>
>>>    Also, LSC interrupt and control queue functions are not
>>> supported yet.
>>> I agree both functions may not be so important, and if we need it
>>> we can implement them, but we need to pay energy to implement them.
>> LSC is really less important than rxq interrupt (IMO). We don't know how
>> long will rxq interrupt of virtio be available for QEMU, but we can
>> accelerate it if we avoid using QEMU.
>>
>> Actually, if the vhost backend is vhost-user (the main use case),
>> current qemu have limited control queue support, because it needs the
>> support from the vhost user backend.
>>
>> Add one more con of my solution:
>> - Need to write another logic to support other virtio device (say
>> virtio-scsi), if it's easier of Tetsuya's solution to do that?
>>
> Probably, my solution will be easier to do that.
> My solution has enough facility to access to io port and PCI
> configuration space of virtio-scsi device of QEMU.
> So, if you invoke with QEMU with virtio-scsi, only you need to do is
> changing PCI interface of current virtio-scsi PMD.
> (I just assume currently we have virtio-scsi PMD.)
> If the virtio-scsi PMD works on QEMU, same code should work with only
> changing PCI interface.
>
>>> [My solution]
>>> - Pros
>>> Basic principle of my implementation is not to reinvent the wheel.
>>> We can use a virtio-net device of QEMU implementation, it means we don't
>>> need to maintain virtio-net device by ourselves, and we can use all of
>>> functions supported by QEMU virtio-net device.
>>> - Cons
>>> Need to invoke QEMU process.
>> Two more possible cons:
>> a) This solution also needs to maintain qtest utility, right?
> But the spec of qtest will be more stable than virtio-net.
>
>> b) There's still address arrange restriction, right? Although we can use
>> "--base-virtaddr=0x400000000" to relieve this question, but how about if
>> there are 2 or more devices? (By the way, is there still address arrange
>> requirement for 32 bit system)
> Our solutions are a virtio-net driver, and a vhost-user backend driver
> needs to access to memory allocated by virtio-net driver.
> If an application has 2 devices, it means 2 vhost-user backend PMD needs
> to access to the same application memory, right?
> Also, currently each virtio-net device has an one QEMU process.
> So, I am not sure what will be problem if we have 2 devices.

OK, my bad. Multiple devices should have just one 
"--base-virtaddr=0x400000000".

>
> BTW, 44bits limitations comes from current QEMU implementation itself.
> (Actually, if modern virtio device is used, we should be able to remove
> the restriction.)

Good to know.

>
>> c) Actually, IMO this solution is sensitive to any virtio spec change
>> (io port, pci configuration space).
> In this case, virtio-net PMD itself will need to be fixed.
> Then, my implementation will be also fixed with the same way.
> Current implementation has only PCI abstraction that Yuanhan introduced,
> so you may think my solution depends on above things, but actually, my
> implementation depends on only how to access to io port and PCI
> configuration space. This is what "qtest.h" provides.

Gotcha.

Thanks,
Jianfeng

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06  9:30                           ` Tetsuya Mukawa
  2016-06-06  9:58                             ` Yuanhan Liu
@ 2016-06-06 10:50                             ` Tan, Jianfeng
  2016-06-07  7:12                               ` Tetsuya Mukawa
  1 sibling, 1 reply; 120+ messages in thread
From: Tan, Jianfeng @ 2016-06-06 10:50 UTC (permalink / raw)
  To: Tetsuya Mukawa, Yuanhan Liu
  Cc: dev, huawei.xie, Thomas Monjalon, David Marchand, nakajima.yoshihiro

Hi,


On 6/6/2016 5:30 PM, Tetsuya Mukawa wrote:
> On 2016/06/06 17:49, Yuanhan Liu wrote:
>> On Mon, Jun 06, 2016 at 05:33:31PM +0900, Tetsuya Mukawa wrote:
>>>>> [My solution]
>>>>> - Pros
>>>>> Basic principle of my implementation is not to reinvent the wheel.
>>>> Yes, that's a good point. However, it's not that hard as we would have
>>>> thought in the first time: the tough part that dequeue/enqueue packets
>>>> from/to vring is actually offloaded to DPDK vhost-user. That means we
>>>> only need re-implement the control path of virtio-net device, plus the
>>>> vhost-user frontend. If you have a detailed look of your patchset as
>>>> well Jianfeng's, you might find that the two patchset are actually with
>>>> same code size.
>>> Yes, I know this.
>>> So far, the amount of code is almost same, but in the future we may need
>>> to implement more, if virtio-net specification is revised.
>> It didn't take too much effort to implement from scratch, I doubt it
>> will for future revise. And, virtio-net spec is unlikely revised, or
>> to be precisely, unlikely revised quite often. Therefore, I don't see
>> big issues here.
>>
>>>>> We can use a virtio-net device of QEMU implementation, it means we don't
>>>>> need to maintain virtio-net device by ourselves, and we can use all of
>>>>> functions supported by QEMU virtio-net device.
>>>>> - Cons
>>>>> Need to invoke QEMU process.
>>>> Another thing is that it makes the usage a bit harder: look at the
>>>> long qemu cli options of your example usage. It also has some traps,
>>>> say, "--enable-kvm" is not allowed, which is a default option used
>>>> with QEMU.
>>> Probably a kind of shell script will help the users.
>> Yeah, that would help. But if we have a choice to make it simpler in the
>> beginning, why not then? :-)
>>
>>>> And judging that we actually don't take too much effort to implement
>>>> a virtio device emulation, I'd prefer it slightly. I guess something
>>>> light weight and easier for use is more important here.
>>> This is very important point.
>>> If so, we don't need much effort when virtio-spec is changed.
>> I'd assume so.
>>
>>>> Actually, I have foreseen another benefit of adding virtio-user device
>>>> emulation: we now might be able to add a rte_vhost_dequeue/enqueue_burst()
>>>> unit test case. We simply can't do it before, since we depend on QEMU
>>>> for testing, which is not acceptable for a unit test case. Making it
>>>> be a unit test case would help us spotting any bad changes that would
>>>> introduce bugs easily and automatically.
>>> As you mentioned above, QEMU process is not related with
>>> dequeuing/enqueuing.
>>> So I guess we may have a testing for rte_vhost_dequeue/enqueue_burst()
>>> regardless of choice.
>> Yes, we don't need the dequeue/enqueue part, but we need the vhost-user
>> initialization part from QEMU vhost-user. Now that we have vhost-user
>> frontend from virtio-user, we have no dependency on QEMU any more.
>>
>>>>> Anyway, we can choose one of belows.
>>>>> 1. Take advantage of invoking less processes.
>>>>> 2. Take advantage of maintainability of virtio-net device.
>>> If container usage that DPDK assumes is to invoke hundreds containers in
>>> one host,
>> I barely know about container, but I would assume that's not rare.
> Hi Yuanhan,
>
> It's great to hear it's not so hard to maintain Jiangfeng's virtio-net
> device features.
>
> Please let me make sure how we can invoke many DPDK applications in
> hundreds containers.
> (Do we have a way to do? Or, will we have it in the future?)

Just to add some option here, we cannot say no to that kind of use case. 
To have many instances, we can:

(1) add a restriction of "cpu share" on each instance, relying on kernel 
to schedule.
(2) enable interrupt mode, so that one instance can go to sleep when it 
has no pkts to receive and awoke by vhost backend when pkts come.

Option 2 is my choice.

Thanks,
Jianfeng

>
> Thanks,
> Tetsuya

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-06 10:50                             ` Tan, Jianfeng
@ 2016-06-07  7:12                               ` Tetsuya Mukawa
  2016-06-07  7:33                                 ` Yuanhan Liu
  0 siblings, 1 reply; 120+ messages in thread
From: Tetsuya Mukawa @ 2016-06-07  7:12 UTC (permalink / raw)
  To: Tan, Jianfeng, Yuanhan Liu
  Cc: dev, huawei.xie, Thomas Monjalon, David Marchand, nakajima.yoshihiro

On 2016/06/06 19:50, Tan, Jianfeng wrote:
>> Please let me make sure how we can invoke many DPDK applications in
>> hundreds containers.
>> (Do we have a way to do? Or, will we have it in the future?)
> 
> Just to add some option here, we cannot say no to that kind of use case.
> To have many instances, we can:
> 
> (1) add a restriction of "cpu share" on each instance, relying on kernel
> to schedule.
> (2) enable interrupt mode, so that one instance can go to sleep when it
> has no pkts to receive and awoke by vhost backend when pkts come.
> 
> Option 2 is my choice.

Hi Yuanhan and Jianfeng,

Thanks for your descriptions about how you will invoke many DPDK
applications in containers.
I guess we have almost talked everything we need to consider to choose
one of container implementations.

We may have one conclusion about this choice.
If we can easily maintain virtio device implementation, also if we have
an use-case to invoke hundreds of DPDK applications in containers, I
guess Jianfeng's implementation will be nice.
Anyway, we just follow virtio maintainers choice.

Thanks,
Tetsuya

> 
> Thanks,
> Jianfeng
> 
>>
>> Thanks,
>> Tetsuya
> 
> 

^ permalink raw reply	[flat|nested] 120+ messages in thread

* Re: [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container
  2016-06-07  7:12                               ` Tetsuya Mukawa
@ 2016-06-07  7:33                                 ` Yuanhan Liu
  0 siblings, 0 replies; 120+ messages in thread
From: Yuanhan Liu @ 2016-06-07  7:33 UTC (permalink / raw)
  To: Tetsuya Mukawa
  Cc: Tan, Jianfeng, dev, huawei.xie, Thomas Monjalon, David Marchand,
	nakajima.yoshihiro

On Tue, Jun 07, 2016 at 04:12:28PM +0900, Tetsuya Mukawa wrote:
> On 2016/06/06 19:50, Tan, Jianfeng wrote:
> >> Please let me make sure how we can invoke many DPDK applications in
> >> hundreds containers.
> >> (Do we have a way to do? Or, will we have it in the future?)
> > 
> > Just to add some option here, we cannot say no to that kind of use case.
> > To have many instances, we can:
> > 
> > (1) add a restriction of "cpu share" on each instance, relying on kernel
> > to schedule.
> > (2) enable interrupt mode, so that one instance can go to sleep when it
> > has no pkts to receive and awoke by vhost backend when pkts come.
> > 
> > Option 2 is my choice.
> 
> Hi Yuanhan and Jianfeng,
> 
> Thanks for your descriptions about how you will invoke many DPDK
> applications in containers.
> I guess we have almost talked everything we need to consider to choose
> one of container implementations.
> 
> We may have one conclusion about this choice.
> If we can easily maintain virtio device implementation,

AFAIK, yes.

> also if we have
> an use-case to invoke hundreds of DPDK applications in containers, I

Don't know yet, but it seems easier to achieve that with Jianfeng's
solution.

> guess Jianfeng's implementation will be nice.

I'm afraid that's what I'm seeing.

> Anyway, we just follow virtio maintainers choice.

Thanks, and of course, contribution is huge welcome so that we could
have a better container solution!

	--yliu

^ permalink raw reply	[flat|nested] 120+ messages in thread

end of thread, other threads:[~2016-06-07  7:32 UTC | newest]

Thread overview: 120+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-18  9:13 [PATCH 0/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
2016-01-18  9:13 ` [PATCH 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
2016-01-21 11:07   ` [RFC PATCH 0/5] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
2016-01-21 11:10     ` Tetsuya Mukawa
2016-01-21 11:07   ` [RFC PATCH 1/5] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
2016-01-21 11:07   ` [RFC PATCH 2/5] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c Tetsuya Mukawa
2016-01-21 11:07   ` [RFC PATCH 3/5] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
2016-01-22  7:26     ` Xie, Huawei
2016-01-22  7:35       ` Tetsuya Mukawa
2016-01-21 11:07   ` [RFC PATCH 4/5] EAL: Add new EAL "--shm" option Tetsuya Mukawa
2016-01-22  1:43     ` Tan, Jianfeng
2016-01-22  2:07       ` Tan, Jianfeng
2016-01-22  3:23         ` Tetsuya Mukawa
2016-01-21 11:07   ` [RFC PATCH 5/5] virtio: Extend virtio-net PMD to support container environment Tetsuya Mukawa
2016-01-22  8:14     ` Xie, Huawei
2016-01-22 10:37       ` Tetsuya Mukawa
2016-01-25 10:15         ` Xie, Huawei
2016-01-26  2:58           ` Tetsuya Mukawa
2016-01-27  9:39             ` Xie, Huawei
2016-01-28  2:33               ` Tetsuya Mukawa
2016-01-25 10:17     ` Xie, Huawei
2016-01-26  2:58       ` Tetsuya Mukawa
2016-01-25 10:29     ` Xie, Huawei
2016-01-26  2:58       ` Tetsuya Mukawa
2016-01-27 10:03     ` Xie, Huawei
2016-01-28  2:44       ` Tetsuya Mukawa
2016-01-29  8:56         ` Xie, Huawei
2016-01-27 15:58     ` Xie, Huawei
2016-01-28  2:47       ` Tetsuya Mukawa
2016-01-28  9:48         ` Xie, Huawei
2016-01-28  9:53           ` Tetsuya Mukawa
2016-01-27 16:45     ` Xie, Huawei
2016-01-28  2:47       ` Tetsuya Mukawa
2016-01-28  6:15         ` Xie, Huawei
2016-01-28  6:29           ` Tetsuya Mukawa
2016-01-29  8:57     ` Yuanhan Liu
2016-01-29  9:13       ` Yuanhan Liu
2016-02-01  1:49         ` Tetsuya Mukawa
2016-02-10  3:40     ` [PATCH v2 0/5] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
2016-02-10  3:40     ` [PATCH v2 1/5] virtio: Retrieve driver name from eth_dev Tetsuya Mukawa
2016-02-10  3:40     ` [PATCH v2 2/5] EAL: Add new EAL "--qtest-virtio" option Tetsuya Mukawa
2016-02-15  7:52       ` Tan, Jianfeng
2016-02-16  1:32         ` Tetsuya Mukawa
2016-02-16  5:53       ` David Marchand
2016-02-16 11:36         ` Tan, Jianfeng
2016-02-17  3:36           ` Tetsuya Mukawa
2016-02-22  8:17       ` [PATCH v3 0/6] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
2016-02-22  8:17       ` [PATCH v3 1/6] virtio: Retrieve driver name from eth_dev Tetsuya Mukawa
2016-02-22  8:17       ` [PATCH v3 2/6] vhost: Add a function to check virtio device type Tetsuya Mukawa
2016-02-22  8:17       ` [PATCH v3 3/6] EAL: Add new EAL "--range-virtaddr" option Tetsuya Mukawa
2016-03-04  2:20         ` Tan, Jianfeng
2016-03-09  8:33         ` [PATCH v4 00/12] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 01/12] virtio: Retrieve driver name from eth_dev Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 02/12] vhost: Add a function to check virtio device type Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 03/12] EAL: Add a new "--range-virtaddr" option Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 04/12] EAL: Add a new "--align-memsize" option Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 05/12] virtio, qtest: Add QTest utility basic functions Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 06/12] virtio, qtest: Add pci device initialization function to qtest utils Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 07/12] virtio, qtest: Add functionality to share memory between QTest guest Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 08/12] virtio, qtest: Add functionality to handle interrupt Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 09/12] virtio, qtest: Add misc functions to handle pci information Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 10/12] virtio: Add QTest support to vtpci abstraction Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 11/12] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
2016-06-02  3:29           ` [PATCH v5 0/6] Virtio-net PMD: QEMU QTest extension for container Tetsuya Mukawa
2016-06-02  7:31             ` Yuanhan Liu
2016-06-02  9:30               ` Tetsuya Mukawa
2016-06-03  4:17                 ` Yuanhan Liu
2016-06-03 13:51                   ` Thomas Monjalon
2016-06-06  5:10                   ` Tetsuya Mukawa
2016-06-06  7:21                     ` Yuanhan Liu
2016-06-06  8:33                       ` Tetsuya Mukawa
2016-06-06  8:49                         ` Yuanhan Liu
2016-06-06  9:30                           ` Tetsuya Mukawa
2016-06-06  9:58                             ` Yuanhan Liu
2016-06-06 10:50                             ` Tan, Jianfeng
2016-06-07  7:12                               ` Tetsuya Mukawa
2016-06-07  7:33                                 ` Yuanhan Liu
2016-06-06  8:03                     ` Tan, Jianfeng
2016-06-06  9:28                       ` Tetsuya Mukawa
2016-06-06 10:35                         ` Tan, Jianfeng
2016-06-02  3:29           ` [PATCH v5 1/6] virtio, qtest: Add QTest utility basic functions Tetsuya Mukawa
2016-06-02  3:29           ` [PATCH v5 2/6] virtio, qtest: Add pci device initialization function to qtest utils Tetsuya Mukawa
2016-06-02  3:29           ` [PATCH v5 3/6] virtio, qtest: Add functionality to share memory between QTest guest Tetsuya Mukawa
2016-06-02  3:29           ` [PATCH v5 4/6] virtio, qtest: Add misc functions to handle pci information Tetsuya Mukawa
2016-06-02  3:29           ` [PATCH v5 5/6] virtio: Add QTest support to vtpci abstraction Tetsuya Mukawa
2016-06-02  3:29           ` [PATCH v5 6/6] virtio: Add QTest support for virtio-net PMD Tetsuya Mukawa
2016-06-02  3:30           ` [PATCH v1 0/2] Supplement patches for virtio-qtest to support LSC interrupt Tetsuya Mukawa
2016-06-02  3:30           ` [PATCH v1 1/2] virtio: Handle interrupt things under vtpci abstraction Tetsuya Mukawa
2016-06-02  3:30           ` [PATCH v1 2/2] virtio, qtest: Add functionality to handle interrupt Tetsuya Mukawa
2016-03-09  8:33         ` [PATCH v4 12/12] docs: add release note for qtest virtio container support Tetsuya Mukawa
2016-02-22  8:17       ` [PATCH v3 4/6] EAL: Add a new "--align-memsize" option Tetsuya Mukawa
2016-02-22  8:17       ` [PATCH v3 5/6] virtio: Add support for qtest virtio-net PMD Tetsuya Mukawa
2016-03-04  2:18         ` Tan, Jianfeng
2016-03-04  5:05           ` Tetsuya Mukawa
2016-03-04  6:10             ` Tan, Jianfeng
2016-03-04  9:53               ` Tetsuya Mukawa
2016-02-22  8:17       ` [PATCH v3 6/6] docs: add release note for qtest virtio container support Tetsuya Mukawa
2016-02-22 15:40         ` Mcnamara, John
2016-02-23 10:28           ` Mcnamara, John
2016-02-24  1:20             ` Tetsuya Mukawa
2016-02-10  3:40     ` [PATCH v2 3/5] vhost: Add a function to check virtio device type Tetsuya Mukawa
2016-02-10  3:40     ` [PATCH v2 4/5] virtio: Add support for qtest virtio-net PMD Tetsuya Mukawa
2016-02-10  3:40     ` [PATCH v2 5/5] docs: add release note for qtest virtio container support Tetsuya Mukawa
2016-01-28  9:33   ` [PATCH v2 0/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
2016-01-28  9:33   ` [PATCH v2 1/3] virtio: Change the parameter order of io_write8/16/32() Tetsuya Mukawa
2016-01-28  9:33   ` [PATCH v2 2/3] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c Tetsuya Mukawa
2016-01-28  9:33   ` [PATCH v2 3/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
2016-01-29  9:17     ` Yuanhan Liu
2016-02-01  1:50       ` Tetsuya Mukawa
2016-02-01 13:15         ` Yuanhan Liu
2016-02-02  2:19           ` Tetsuya Mukawa
2016-02-02  2:45             ` Yuanhan Liu
2016-02-02  3:55               ` Tetsuya Mukawa
2016-01-18  9:13 ` [PATCH 2/3] virtio: move rte_eal_pci_unmap_device() to virtio_pci.c Tetsuya Mukawa
2016-01-18  9:13 ` [PATCH 3/3] virtio: Add a new layer to abstract pci access method Tetsuya Mukawa
2016-01-18 13:46   ` Yuanhan Liu
2016-01-19  1:22     ` Tetsuya Mukawa
2016-01-19  2:41     ` Xie, Huawei
2016-01-18 13:13 ` [PATCH 0/3] " Tan, Jianfeng
2016-01-19  1:22   ` Tetsuya Mukawa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.