All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drivers/net:new PMD using tun/tap host interface
@ 2016-09-15 14:10 Keith Wiles
  2016-09-15 14:10 ` [PATCH 2/3] docs:tun/tap PMD information Keith Wiles
                   ` (2 more replies)
  0 siblings, 3 replies; 59+ messages in thread
From: Keith Wiles @ 2016-09-15 14:10 UTC (permalink / raw)
  To: dev

The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 drivers/net/tap/Makefile                |  60 +++
 drivers/net/tap/rte_eth_tap.c           | 872 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 3 files changed, 936 insertions(+)
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..442a2fe
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,60 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2014 John W. Linville <linville@redhat.com>
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2014 6WIND S.A.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..027bb35
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,872 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_dev.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#ifdef __linux__
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#else
+#include <netinet/if_ether.h>
+#endif
+#include <fcntl.h>
+
+#include <poll.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit = 0;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct tap_info {
+	char name[RTE_ETH_NAME_MAX_LEN]; /* Interface name supplied/given */
+	int speed;			 /* Speed of interface */
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;			/* Number of queues supported */
+	uint16_t pad0;
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/*
+ * Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char * name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n", ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the fiile descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		strcpy(name, ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/*
+ * Callback to handle the rx burst of packets to the correct interface and file
+ * descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len, n;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	struct pollfd pfd;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	pfd.events = POLLIN;
+	pfd.fd = rxq->fd;
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents == 0)
+			continue;
+
+		if (pfd.revents & POLLERR) {
+			rxq->stats.errs++;
+			RTE_LOG(ERR, PMD, "Packet Error\n");
+			break;
+		}
+		if (pfd.revents & POLLHUP)
+			RTE_LOG(ERR, PMD, "Peer closed connection\n");
+
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(mbuf == NULL)) {
+			RTE_LOG(ERR, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(pfd.fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			RTE_LOG(ERR, PMD, "len %d\n", len);
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/*
+ * Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/*
+ * This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = (uint16_t)internals->nb_queues;
+	dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+{
+	unsigned i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *internal = dev->data->dev_private;
+
+	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		igb_stats->q_ipackets[i] = internal->rxq[i].stats.ipackets;
+		igb_stats->q_ibytes[i] = internal->rxq[i].stats.ibytes;
+		rx_total += igb_stats->q_ipackets[i];
+		rx_bytes_total += igb_stats->q_ibytes[i];
+	}
+
+	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		igb_stats->q_opackets[i] = internal->txq[i].stats.opackets;
+		igb_stats->q_errors[i] = internal->txq[i].stats.errs;
+		igb_stats->q_obytes[i] = internal->txq[i].stats.obytes;
+		tx_total += igb_stats->q_opackets[i];
+		tx_err_total += igb_stats->q_errors[i];
+		tx_bytes_total += igb_stats->q_obytes[i];
+	}
+
+	igb_stats->ipackets = rx_total;
+	igb_stats->ibytes = rx_bytes_total;
+	igb_stats->opackets = tx_total;
+	igb_stats->oerrors = tx_err_total;
+	igb_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internal = dev->data->dev_private;
+
+	for (i = 0; i < internal->nb_queues; i++) {
+		internal->rxq[i].stats.ipackets = 0;
+		internal->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < internal->nb_queues; i++) {
+		internal->txq[i].stats.opackets = 0;
+		internal->txq[i].stats.errs = 0;
+		internal->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	if ((fd = rx->fd) < 0)
+		if ((fd = tx->fd) < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			if ((fd = tun_alloc(dev->data->name)) < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
+				return -1;
+			}
+		}
+
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || (mp == NULL)) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n", internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t) (rte_pktmbuf_data_room_size(mp) -
+			       RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret = -1;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return ret;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+#define RTE_USE_GLOBAL_DATA	0x0000
+#define RTE_USE_PRIVATE_DATA	0x0001
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || (dev == NULL) || (addr == NULL))
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/*
+	 * Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+rte_eth_dev_create(const char *name, int dev_type,
+		   struct rte_eth_dev **eth_dev,
+		   const struct eth_dev_ops *dev_ops,
+		   void **internals, size_t internal_size,
+		   uint16_t flag)
+{
+	char buff[RTE_ETH_NAME_MAX_LEN];
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	void *priv = NULL;
+
+	if ((name == NULL) || (eth_dev == NULL) || (dev_ops == NULL) ||
+	    (internals == NULL) || (internal_size == 0)) {
+		RTE_PMD_DEBUG_TRACE("Paramters are invalid\n");
+		return -1;
+	}
+
+	dev = rte_eth_dev_allocate(name, dev_type);
+	if (dev == NULL) {
+		RTE_PMD_DEBUG_TRACE("%s: rte_eth_dev_allocate failed for %s\n",
+				    name, buff);
+		goto error;
+	}
+
+	if (flag & RTE_USE_PRIVATE_DATA) {
+		/*
+		 * now do all data allocation - for eth_dev structure, dummy
+		 * pci driver and internal (private) data
+		 */
+		snprintf(buff, sizeof(buff), "D-%s-%d", name, numa_node);
+		data = rte_zmalloc_socket(buff, sizeof(struct rte_eth_dev_data),
+					  0, numa_node);
+		if (data == NULL) {
+			RTE_PMD_DEBUG_TRACE("%s: Unable to allocate memory\n",
+					    name);
+			goto error;
+		}
+		/* move the current state of the structure to the new one */
+		rte_memcpy(data, dev->data, sizeof(struct rte_eth_dev_data));
+		dev->data = data;	/* Override the current data pointer */
+	} else
+		data = dev->data;
+
+	snprintf(buff, sizeof(buff), "I-%s-%d", name, numa_node);
+	priv = rte_zmalloc_socket(buff, internal_size, 0, numa_node);
+	if (priv == NULL) {
+		RTE_PMD_DEBUG_TRACE("Unable to allocate internal memory %lu\n",
+				    internal_size);
+		goto error;
+	}
+
+	/* Setup some default values */
+	dev->dev_ops = dev_ops;
+	data->dev_private = priv;
+	data->port_id = dev->data->port_id;
+	memmove(data->name, dev->data->name, strlen(dev->data->name));
+
+	dev->driver = NULL;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->numa_node = numa_node;
+
+	*eth_dev = dev;
+	*internals = priv;
+
+	return 0;
+error:
+	rte_free(priv);
+
+	if (flag & RTE_USE_PRIVATE_DATA)
+		rte_free(data);
+
+	rte_eth_dev_release_port(dev);
+
+	return -1;
+}
+
+static int
+pmd_init_internals(const char *name, struct tap_info *tap,
+		   struct pmd_internals **internals,
+		   struct rte_eth_dev **eth_dev)
+{
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *internal = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int ret, i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TUN/TAP Ethernet device with %d queues on numa %u\n",
+		name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	pmd_link.link_speed = tap->speed;
+
+	ret = rte_eth_dev_create(tap->name, RTE_ETH_DEV_VIRTUAL, &dev, &ops,
+				 (void **)&internal, sizeof(struct pmd_internals),
+				 RTE_USE_PRIVATE_DATA);
+	if (ret < 0)
+		return -1;
+
+	strncpy(internal->name, tap->name, sizeof(internal->name));
+
+	internal->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Create the first Tap device */
+	if ((fd = tun_alloc(dev->data->name)) < 0) {
+		RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
+		rte_free(internal);
+		rte_eth_dev_release_port(dev);
+		return -1;
+	}
+
+	/* Presetup the fds to -1 as being not working */
+	for(i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		internal->fds[i] = -1;
+		internal->rxq[i].fd = -1;
+		internal->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	internal->rxq[0].fd = fd;
+	internal->txq[0].fd = fd;
+	internal->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &internal->eth_addr) < 0) {
+		rte_free(internal);
+		rte_eth_dev_release_port(dev);
+		return -1;
+	}
+
+	data = dev->data;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &internal->eth_addr;
+
+	data->nb_rx_queues = (uint16_t)internal->nb_queues;
+	data->nb_tx_queues = (uint16_t)internal->nb_queues;
+	data->drv_name = drivername;
+
+	*eth_dev = dev;
+	*internals = internal;
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, struct tap_info *tap)
+{
+	struct pmd_internals *internals = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+
+	if (pmd_init_internals(name, tap, &internals, &eth_dev) < 0)
+		return -1;
+
+	eth_dev->rx_pkt_burst = pmd_rx_burst;
+	eth_dev->tx_pkt_burst = pmd_tx_burst;
+
+	return 0;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	struct tap_info *tap = (struct tap_info *)extra_args;
+
+	if (value)
+		snprintf(tap->name, sizeof(tap->name), "%s", value);
+	else
+		snprintf(tap->name, sizeof(tap->name), "dtap%d", (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args __rte_unused)
+{
+	struct tap_info *tap = (struct tap_info *)extra_args;
+
+	pmd_link.link_speed = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+	tap->speed = pmd_link.link_speed;
+
+	return 0;
+}
+
+/*
+ * Open a TAP interface device.
+ */
+static int
+pmd_tap_devinit(const char *name, const char *params)
+{
+	int ret = 0;
+	struct rte_kvargs *kvlist;
+	struct tap_info tap_info;
+
+	/* Setup default values */
+	memset(&tap_info, 0, sizeof(tap_info));
+
+	tap_info.speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_info.name, sizeof(tap_info.name), "tap%d", tap_unit++);
+
+	if ((params == NULL) || (params[0] == '\0')) {
+		RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s\n", name);
+
+		ret = eth_dev_tap_create(name, &tap_info);
+		goto leave;
+	}
+
+	RTE_LOG(INFO, PMD, "Initialize %s with params (%s)\n", name, params);
+
+	kvlist = rte_kvargs_parse(params, valid_arguments);
+	if (!kvlist) {
+		ret = eth_dev_tap_create(name, &tap_info);
+		goto leave;
+	}
+
+	if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_TAP_SPEED_ARG,
+					 &set_interface_speed, &tap_info);
+		if (ret < 0)
+			goto leave;
+	} else
+		set_interface_speed(NULL, NULL, &tap_info);
+
+	if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_TAP_IFACE_ARG,
+					 &set_interface_name, &tap_info);
+		if (ret < 0)
+			goto leave;
+	} else
+		set_interface_name(NULL, NULL, (void *)&tap_info);
+
+	rte_kvargs_free(kvlist);
+
+leave:
+	if (ret == -1)
+		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s\n", name);
+
+	return ret;
+}
+
+/*
+ * detach a TAP device.
+ */
+static int
+pmd_tap_devuninit(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	if (name == NULL)
+		return 0;
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_driver pmd_tap_drv = {
+	.type = PMD_VDEV,
+	.init = pmd_tap_devinit,
+	.uninit = pmd_tap_devuninit,
+};
+
+PMD_REGISTER_DRIVER(pmd_tap_drv, eth_tap);
+DRIVER_REGISTER_PARAM_STRING(eth_tap,
+			     "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 2/3] docs:tun/tap PMD information
  2016-09-15 14:10 [PATCH 1/3] drivers/net:new PMD using tun/tap host interface Keith Wiles
@ 2016-09-15 14:10 ` Keith Wiles
  2016-09-15 14:13   ` Wiles, Keith
  2016-09-21  2:00   ` [PATCH v3] drivers/net:new PMD using tun/tap host interface Keith Wiles
  2016-09-15 14:10 ` [PATCH 3/3] drivers/net:build support for new tap device driver Keith Wiles
  2016-09-16 16:22 ` [PATCH v2] drivers/net:new PMD using tun/tap host interface Keith Wiles
  2 siblings, 2 replies; 59+ messages in thread
From: Keith Wiles @ 2016-09-15 14:10 UTC (permalink / raw)
  To: dev

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 doc/guides/nics/tap.rst | 84 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst

diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..072def8
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,84 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================================
+
+The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
+host. The PMD allows for DPDK and the host to communicate using a raw device
+interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw format
+with a L2 header. The usage for a TAP PMD is for connectivity to the local host
+using a TAP interface. When the TAP PMD is initialized it will create a number
+of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
+commands can be used to assign and query the virtual like device.
+
+These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
+with being able to be used as a network connection to the DPDK application. The
+method enable one or more interfaces is to use the --vdev=eth_tap option on the
+DPDK application  command line. Each --vdev=eth_tap option give will create an
+interface named dtap0, dtap1, ... and so forth.
+
+.. code-block:: console
+
+   The interfaced name can be changed by adding the iface=foo0
+   e.g. --vedv=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
+
+.. code-block:: console
+
+   Also the speed of the interface can be changed from 10G to whatever number
+   needed, but the interface does not enforce that speed.
+   e.g. --vdev=eth_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
+of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
+others to communicate with the DPDK application. The DPDK application may not
+understand network protocols like IPv4/6, UDP or TCP unless the application has
+been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands:
+
+.. code-block:: console
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you account
+for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal interface.
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH 3/3] drivers/net:build support for new tap device driver
  2016-09-15 14:10 [PATCH 1/3] drivers/net:new PMD using tun/tap host interface Keith Wiles
  2016-09-15 14:10 ` [PATCH 2/3] docs:tun/tap PMD information Keith Wiles
@ 2016-09-15 14:10 ` Keith Wiles
  2016-09-16  7:36   ` Panu Matilainen
  2016-09-16 16:22 ` [PATCH v2] drivers/net:new PMD using tun/tap host interface Keith Wiles
  2 siblings, 1 reply; 59+ messages in thread
From: Keith Wiles @ 2016-09-15 14:10 UTC (permalink / raw)
  To: dev

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 config/common_linuxapp | 3 +++
 drivers/net/Makefile   | 1 +
 mk/rte.app.mk          | 1 +
 3 files changed, 5 insertions(+)

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..704c01c 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,6 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
+CONFIG_RTE_PMD_TAP_MAX_QUEUES=32
+
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..b4afa98 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 1a0095b..bd1d10f 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -129,6 +129,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
 endif # $(CONFIG_RTE_LIBRTE_VHOST)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)   += -lrte_pmd_aesni_mb
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH 2/3] docs:tun/tap PMD information
  2016-09-15 14:10 ` [PATCH 2/3] docs:tun/tap PMD information Keith Wiles
@ 2016-09-15 14:13   ` Wiles, Keith
  2016-09-15 14:15     ` Wiles, Keith
  2016-09-21  2:00   ` [PATCH v3] drivers/net:new PMD using tun/tap host interface Keith Wiles
  1 sibling, 1 reply; 59+ messages in thread
From: Wiles, Keith @ 2016-09-15 14:13 UTC (permalink / raw)
  To: dev

self Nak - just noticed the copyright notices are wrong. 

Regards,
Keith

> On Sep 15, 2016, at 9:10 AM, Keith Wiles <keith.wiles@intel.com> wrote:
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
> doc/guides/nics/tap.rst | 84 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 84 insertions(+)
> create mode 100644 doc/guides/nics/tap.rst
> 
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> new file mode 100644
> index 0000000..072def8
> --- /dev/null
> +++ b/doc/guides/nics/tap.rst
> @@ -0,0 +1,84 @@
> +..  BSD LICENSE
> +    Copyright(c) 2016 Intel Corporation. All rights reserved.
> +    All rights reserved.
> +
> +    Redistribution and use in source and binary forms, with or without
> +    modification, are permitted provided that the following conditions
> +    are met:
> +
> +    * Redistributions of source code must retain the above copyright
> +    notice, this list of conditions and the following disclaimer.
> +    * Redistributions in binary form must reproduce the above copyright
> +    notice, this list of conditions and the following disclaimer in
> +    the documentation and/or other materials provided with the
> +    distribution.
> +    * Neither the name of Intel Corporation nor the names of its
> +    contributors may be used to endorse or promote products derived
> +    from this software without specific prior written permission.
> +
> +    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +Tun/Tap Poll Mode Driver
> +========================================
> +
> +The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
> +host. The PMD allows for DPDK and the host to communicate using a raw device
> +interface on the host and in the DPDK application.
> +
> +The device created is a TAP device, which sends/receives packet in a raw format
> +with a L2 header. The usage for a TAP PMD is for connectivity to the local host
> +using a TAP interface. When the TAP PMD is initialized it will create a number
> +of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
> +commands can be used to assign and query the virtual like device.
> +
> +These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
> +with being able to be used as a network connection to the DPDK application. The
> +method enable one or more interfaces is to use the --vdev=eth_tap option on the
> +DPDK application  command line. Each --vdev=eth_tap option give will create an
> +interface named dtap0, dtap1, ... and so forth.
> +
> +.. code-block:: console
> +
> +   The interfaced name can be changed by adding the iface=foo0
> +   e.g. --vedv=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
> +
> +.. code-block:: console
> +
> +   Also the speed of the interface can be changed from 10G to whatever number
> +   needed, but the interface does not enforce that speed.
> +   e.g. --vdev=eth_tap,iface=foo0,speed=25000
> +
> +After the DPDK application is started you can send and receive packets on the
> +interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
> +of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
> +others to communicate with the DPDK application. The DPDK application may not
> +understand network protocols like IPv4/6, UDP or TCP unless the application has
> +been written to understand these protocols.
> +
> +If you need the interface as a real network interface meaning running and has
> +a valid IP address then you can do this with the following commands:
> +
> +.. code-block:: console
> +
> +   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
> +   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
> +
> +Please change the IP addresses as you see fit.
> +
> +If routing is enabled on the host you can also communicate with the DPDK App
> +over the internet via a standard socket layer application as long as you account
> +for the protocol handing in the application.
> +
> +If you have a Network Stack in your DPDK application or something like it you
> +can utilize that stack to handle the network protocols. Plus you would be able
> +to address the interface using an IP address assigned to the internal interface.
> -- 
> 2.8.0.GIT
> 

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 2/3] docs:tun/tap PMD information
  2016-09-15 14:13   ` Wiles, Keith
@ 2016-09-15 14:15     ` Wiles, Keith
  0 siblings, 0 replies; 59+ messages in thread
From: Wiles, Keith @ 2016-09-15 14:15 UTC (permalink / raw)
  To: dev


Regards,
Keith

> On Sep 15, 2016, at 9:13 AM, Wiles, Keith <keith.wiles@intel.com> wrote:
> 
> self Nak - just noticed the copyright notices are wrong. 

My mistake I was looking at the wrong file the headers appear correct.

> 
> Regards,
> Keith
> 
>> On Sep 15, 2016, at 9:10 AM, Keith Wiles <keith.wiles@intel.com> wrote:
>> 
>> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>> ---
>> doc/guides/nics/tap.rst | 84 +++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 84 insertions(+)
>> create mode 100644 doc/guides/nics/tap.rst
>> 
>> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
>> new file mode 100644
>> index 0000000..072def8
>> --- /dev/null
>> +++ b/doc/guides/nics/tap.rst
>> @@ -0,0 +1,84 @@
>> +..  BSD LICENSE
>> +    Copyright(c) 2016 Intel Corporation. All rights reserved.
>> +    All rights reserved.
>> +
>> +    Redistribution and use in source and binary forms, with or without
>> +    modification, are permitted provided that the following conditions
>> +    are met:
>> +
>> +    * Redistributions of source code must retain the above copyright
>> +    notice, this list of conditions and the following disclaimer.
>> +    * Redistributions in binary form must reproduce the above copyright
>> +    notice, this list of conditions and the following disclaimer in
>> +    the documentation and/or other materials provided with the
>> +    distribution.
>> +    * Neither the name of Intel Corporation nor the names of its
>> +    contributors may be used to endorse or promote products derived
>> +    from this software without specific prior written permission.
>> +
>> +    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> +    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> +    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> +    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> +    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> +    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> +    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> +    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> +    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> +    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> +    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> +
>> +Tun/Tap Poll Mode Driver
>> +========================================
>> +
>> +The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
>> +host. The PMD allows for DPDK and the host to communicate using a raw device
>> +interface on the host and in the DPDK application.
>> +
>> +The device created is a TAP device, which sends/receives packet in a raw format
>> +with a L2 header. The usage for a TAP PMD is for connectivity to the local host
>> +using a TAP interface. When the TAP PMD is initialized it will create a number
>> +of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
>> +commands can be used to assign and query the virtual like device.
>> +
>> +These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
>> +with being able to be used as a network connection to the DPDK application. The
>> +method enable one or more interfaces is to use the --vdev=eth_tap option on the
>> +DPDK application  command line. Each --vdev=eth_tap option give will create an
>> +interface named dtap0, dtap1, ... and so forth.
>> +
>> +.. code-block:: console
>> +
>> +   The interfaced name can be changed by adding the iface=foo0
>> +   e.g. --vedv=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
>> +
>> +.. code-block:: console
>> +
>> +   Also the speed of the interface can be changed from 10G to whatever number
>> +   needed, but the interface does not enforce that speed.
>> +   e.g. --vdev=eth_tap,iface=foo0,speed=25000
>> +
>> +After the DPDK application is started you can send and receive packets on the
>> +interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
>> +of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
>> +others to communicate with the DPDK application. The DPDK application may not
>> +understand network protocols like IPv4/6, UDP or TCP unless the application has
>> +been written to understand these protocols.
>> +
>> +If you need the interface as a real network interface meaning running and has
>> +a valid IP address then you can do this with the following commands:
>> +
>> +.. code-block:: console
>> +
>> +   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
>> +   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
>> +
>> +Please change the IP addresses as you see fit.
>> +
>> +If routing is enabled on the host you can also communicate with the DPDK App
>> +over the internet via a standard socket layer application as long as you account
>> +for the protocol handing in the application.
>> +
>> +If you have a Network Stack in your DPDK application or something like it you
>> +can utilize that stack to handle the network protocols. Plus you would be able
>> +to address the interface using an IP address assigned to the internal interface.
>> -- 
>> 2.8.0.GIT
>> 
> 

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 3/3] drivers/net:build support for new tap device driver
  2016-09-15 14:10 ` [PATCH 3/3] drivers/net:build support for new tap device driver Keith Wiles
@ 2016-09-16  7:36   ` Panu Matilainen
  2016-09-16 14:46     ` Wiles, Keith
  0 siblings, 1 reply; 59+ messages in thread
From: Panu Matilainen @ 2016-09-16  7:36 UTC (permalink / raw)
  To: Keith Wiles, dev

On 09/15/2016 05:10 PM, Keith Wiles wrote:
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
>  config/common_linuxapp | 3 +++
>  drivers/net/Makefile   | 1 +
>  mk/rte.app.mk          | 1 +
>  3 files changed, 5 insertions(+)
>
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 2483dfa..704c01c 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -44,3 +44,6 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
>  CONFIG_RTE_LIBRTE_POWER=y
>  CONFIG_RTE_VIRTIO_USER=y
> +CONFIG_RTE_LIBRTE_PMD_TAP=y
> +CONFIG_RTE_PMD_TAP_MAX_QUEUES=32
> +
> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index bc93230..b4afa98 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
>  DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
>  DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
>
>  ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 1a0095b..bd1d10f 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -129,6 +129,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
>  endif # $(CONFIG_RTE_LIBRTE_VHOST)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
>
>  ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)   += -lrte_pmd_aesni_mb
>

Splitting the Makefile and config changes into a separate patch makes no 
sense at all in this case. Just do it in the patch introducing the driver.

And actually, ditto for documentation.

	- Panu -

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH 3/3] drivers/net:build support for new tap device driver
  2016-09-16  7:36   ` Panu Matilainen
@ 2016-09-16 14:46     ` Wiles, Keith
  0 siblings, 0 replies; 59+ messages in thread
From: Wiles, Keith @ 2016-09-16 14:46 UTC (permalink / raw)
  To: Panu Matilainen; +Cc: dev


Regards,
Keith

> On Sep 16, 2016, at 2:36 AM, Panu Matilainen <pmatilai@redhat.com> wrote:
> 
> On 09/15/2016 05:10 PM, Keith Wiles wrote:
>> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>> ---
>> config/common_linuxapp | 3 +++
>> drivers/net/Makefile   | 1 +
>> mk/rte.app.mk          | 1 +
>> 3 files changed, 5 insertions(+)
>> 
>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>> index 2483dfa..704c01c 100644
>> --- a/config/common_linuxapp
>> +++ b/config/common_linuxapp
>> @@ -44,3 +44,6 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
>> CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
>> CONFIG_RTE_LIBRTE_POWER=y
>> CONFIG_RTE_VIRTIO_USER=y
>> +CONFIG_RTE_LIBRTE_PMD_TAP=y
>> +CONFIG_RTE_PMD_TAP_MAX_QUEUES=32
>> +
>> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
>> index bc93230..b4afa98 100644
>> --- a/drivers/net/Makefile
>> +++ b/drivers/net/Makefile
>> @@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
>> DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
>> DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
>> DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
>> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
>> 
>> ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>> DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
>> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
>> index 1a0095b..bd1d10f 100644
>> --- a/mk/rte.app.mk
>> +++ b/mk/rte.app.mk
>> @@ -129,6 +129,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
>> endif # $(CONFIG_RTE_LIBRTE_VHOST)
>> _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
>> 
>> ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
>> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)   += -lrte_pmd_aesni_mb
>> 
> 
> Splitting the Makefile and config changes into a separate patch makes no sense at all in this case. Just do it in the patch introducing the driver.
> 
> And actually, ditto for documentation.

OK, will do, but I thought other patches followed something like this as I had thought I was following the normally process. Without some place defining the real process it is hard to decide the real way to send patches. :-(

I will send a new single patch later today with one little typo I fixed.

> 
> 	- Panu -

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-15 14:10 [PATCH 1/3] drivers/net:new PMD using tun/tap host interface Keith Wiles
  2016-09-15 14:10 ` [PATCH 2/3] docs:tun/tap PMD information Keith Wiles
  2016-09-15 14:10 ` [PATCH 3/3] drivers/net:build support for new tap device driver Keith Wiles
@ 2016-09-16 16:22 ` Keith Wiles
  2016-09-18 13:25   ` Yuanhan Liu
  2016-09-20  4:05   ` Yuanhan Liu
  2 siblings, 2 replies; 59+ messages in thread
From: Keith Wiles @ 2016-09-16 16:22 UTC (permalink / raw)
  To: dev; +Cc: pmatilai

The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

v2 - merge all of the patches into one patch.
     Fix a typo on naming the tap device.
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_linuxapp                  |   2 +
 doc/guides/nics/tap.rst                 |  84 +++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  60 +++
 drivers/net/tap/rte_eth_tap.c           | 872 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 8 files changed, 1029 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 7c33ad4..fad74e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -392,6 +392,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..59a2053 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,5 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
+CONFIG_RTE_PMD_TAP_MAX_QUEUES=32
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..072def8
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,84 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================================
+
+The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
+host. The PMD allows for DPDK and the host to communicate using a raw device
+interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw format
+with a L2 header. The usage for a TAP PMD is for connectivity to the local host
+using a TAP interface. When the TAP PMD is initialized it will create a number
+of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
+commands can be used to assign and query the virtual like device.
+
+These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
+with being able to be used as a network connection to the DPDK application. The
+method enable one or more interfaces is to use the --vdev=eth_tap option on the
+DPDK application  command line. Each --vdev=eth_tap option give will create an
+interface named dtap0, dtap1, ... and so forth.
+
+.. code-block:: console
+
+   The interfaced name can be changed by adding the iface=foo0
+   e.g. --vedv=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
+
+.. code-block:: console
+
+   Also the speed of the interface can be changed from 10G to whatever number
+   needed, but the interface does not enforce that speed.
+   e.g. --vdev=eth_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
+of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
+others to communicate with the DPDK application. The DPDK application may not
+understand network protocols like IPv4/6, UDP or TCP unless the application has
+been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands:
+
+.. code-block:: console
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you account
+for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal interface.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..b4afa98 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..442a2fe
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,60 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2014 John W. Linville <linville@redhat.com>
+#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2014 6WIND S.A.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..4621c55
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,872 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_dev.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#ifdef __linux__
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#else
+#include <netinet/if_ether.h>
+#endif
+#include <fcntl.h>
+
+#include <poll.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit = 0;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct tap_info {
+	char name[RTE_ETH_NAME_MAX_LEN]; /* Interface name supplied/given */
+	int speed;			 /* Speed of interface */
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;			/* Number of queues supported */
+	uint16_t pad0;
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/*
+ * Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char * name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n", ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the fiile descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		strcpy(name, ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/*
+ * Callback to handle the rx burst of packets to the correct interface and file
+ * descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len, n;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	struct pollfd pfd;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	pfd.events = POLLIN;
+	pfd.fd = rxq->fd;
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents == 0)
+			continue;
+
+		if (pfd.revents & POLLERR) {
+			rxq->stats.errs++;
+			RTE_LOG(ERR, PMD, "Packet Error\n");
+			break;
+		}
+		if (pfd.revents & POLLHUP)
+			RTE_LOG(ERR, PMD, "Peer closed connection\n");
+
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(mbuf == NULL)) {
+			RTE_LOG(ERR, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(pfd.fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			RTE_LOG(ERR, PMD, "len %d\n", len);
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/*
+ * Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/*
+ * This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = (uint16_t)internals->nb_queues;
+	dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+{
+	unsigned i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *internal = dev->data->dev_private;
+
+	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		igb_stats->q_ipackets[i] = internal->rxq[i].stats.ipackets;
+		igb_stats->q_ibytes[i] = internal->rxq[i].stats.ibytes;
+		rx_total += igb_stats->q_ipackets[i];
+		rx_bytes_total += igb_stats->q_ibytes[i];
+	}
+
+	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		igb_stats->q_opackets[i] = internal->txq[i].stats.opackets;
+		igb_stats->q_errors[i] = internal->txq[i].stats.errs;
+		igb_stats->q_obytes[i] = internal->txq[i].stats.obytes;
+		tx_total += igb_stats->q_opackets[i];
+		tx_err_total += igb_stats->q_errors[i];
+		tx_bytes_total += igb_stats->q_obytes[i];
+	}
+
+	igb_stats->ipackets = rx_total;
+	igb_stats->ibytes = rx_bytes_total;
+	igb_stats->opackets = tx_total;
+	igb_stats->oerrors = tx_err_total;
+	igb_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internal = dev->data->dev_private;
+
+	for (i = 0; i < internal->nb_queues; i++) {
+		internal->rxq[i].stats.ipackets = 0;
+		internal->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < internal->nb_queues; i++) {
+		internal->txq[i].stats.opackets = 0;
+		internal->txq[i].stats.errs = 0;
+		internal->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	if ((fd = rx->fd) < 0)
+		if ((fd = tx->fd) < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			if ((fd = tun_alloc(dev->data->name)) < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
+				return -1;
+			}
+		}
+
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || (mp == NULL)) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n", internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t) (rte_pktmbuf_data_room_size(mp) -
+			       RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret = -1;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return ret;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+#define RTE_USE_GLOBAL_DATA	0x0000
+#define RTE_USE_PRIVATE_DATA	0x0001
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || (dev == NULL) || (addr == NULL))
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/*
+	 * Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+rte_eth_dev_create(const char *name, int dev_type,
+		   struct rte_eth_dev **eth_dev,
+		   const struct eth_dev_ops *dev_ops,
+		   void **internals, size_t internal_size,
+		   uint16_t flag)
+{
+	char buff[RTE_ETH_NAME_MAX_LEN];
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	void *priv = NULL;
+
+	if ((name == NULL) || (eth_dev == NULL) || (dev_ops == NULL) ||
+	    (internals == NULL) || (internal_size == 0)) {
+		RTE_PMD_DEBUG_TRACE("Paramters are invalid\n");
+		return -1;
+	}
+
+	dev = rte_eth_dev_allocate(name, dev_type);
+	if (dev == NULL) {
+		RTE_PMD_DEBUG_TRACE("%s: rte_eth_dev_allocate failed for %s\n",
+				    name, buff);
+		goto error;
+	}
+
+	if (flag & RTE_USE_PRIVATE_DATA) {
+		/*
+		 * now do all data allocation - for eth_dev structure, dummy
+		 * pci driver and internal (private) data
+		 */
+		snprintf(buff, sizeof(buff), "D-%s-%d", name, numa_node);
+		data = rte_zmalloc_socket(buff, sizeof(struct rte_eth_dev_data),
+					  0, numa_node);
+		if (data == NULL) {
+			RTE_PMD_DEBUG_TRACE("%s: Unable to allocate memory\n",
+					    name);
+			goto error;
+		}
+		/* move the current state of the structure to the new one */
+		rte_memcpy(data, dev->data, sizeof(struct rte_eth_dev_data));
+		dev->data = data;	/* Override the current data pointer */
+	} else
+		data = dev->data;
+
+	snprintf(buff, sizeof(buff), "I-%s-%d", name, numa_node);
+	priv = rte_zmalloc_socket(buff, internal_size, 0, numa_node);
+	if (priv == NULL) {
+		RTE_PMD_DEBUG_TRACE("Unable to allocate internal memory %lu\n",
+				    internal_size);
+		goto error;
+	}
+
+	/* Setup some default values */
+	dev->dev_ops = dev_ops;
+	data->dev_private = priv;
+	data->port_id = dev->data->port_id;
+	memmove(data->name, dev->data->name, strlen(dev->data->name));
+
+	dev->driver = NULL;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->numa_node = numa_node;
+
+	*eth_dev = dev;
+	*internals = priv;
+
+	return 0;
+error:
+	rte_free(priv);
+
+	if (flag & RTE_USE_PRIVATE_DATA)
+		rte_free(data);
+
+	rte_eth_dev_release_port(dev);
+
+	return -1;
+}
+
+static int
+pmd_init_internals(const char *name, struct tap_info *tap,
+		   struct pmd_internals **internals,
+		   struct rte_eth_dev **eth_dev)
+{
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *internal = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int ret, i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TUN/TAP Ethernet device with %d queues on numa %u\n",
+		name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	pmd_link.link_speed = tap->speed;
+
+	ret = rte_eth_dev_create(tap->name, RTE_ETH_DEV_VIRTUAL, &dev, &ops,
+				 (void **)&internal, sizeof(struct pmd_internals),
+				 RTE_USE_PRIVATE_DATA);
+	if (ret < 0)
+		return -1;
+
+	strncpy(internal->name, tap->name, sizeof(internal->name));
+
+	internal->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Create the first Tap device */
+	if ((fd = tun_alloc(dev->data->name)) < 0) {
+		RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
+		rte_free(internal);
+		rte_eth_dev_release_port(dev);
+		return -1;
+	}
+
+	/* Presetup the fds to -1 as being not working */
+	for(i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		internal->fds[i] = -1;
+		internal->rxq[i].fd = -1;
+		internal->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	internal->rxq[0].fd = fd;
+	internal->txq[0].fd = fd;
+	internal->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &internal->eth_addr) < 0) {
+		rte_free(internal);
+		rte_eth_dev_release_port(dev);
+		return -1;
+	}
+
+	data = dev->data;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &internal->eth_addr;
+
+	data->nb_rx_queues = (uint16_t)internal->nb_queues;
+	data->nb_tx_queues = (uint16_t)internal->nb_queues;
+	data->drv_name = drivername;
+
+	*eth_dev = dev;
+	*internals = internal;
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, struct tap_info *tap)
+{
+	struct pmd_internals *internals = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+
+	if (pmd_init_internals(name, tap, &internals, &eth_dev) < 0)
+		return -1;
+
+	eth_dev->rx_pkt_burst = pmd_rx_burst;
+	eth_dev->tx_pkt_burst = pmd_tx_burst;
+
+	return 0;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	struct tap_info *tap = (struct tap_info *)extra_args;
+
+	if (value)
+		snprintf(tap->name, sizeof(tap->name), "%s", value);
+	else
+		snprintf(tap->name, sizeof(tap->name), "dtap%d", (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args __rte_unused)
+{
+	struct tap_info *tap = (struct tap_info *)extra_args;
+
+	pmd_link.link_speed = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+	tap->speed = pmd_link.link_speed;
+
+	return 0;
+}
+
+/*
+ * Open a TAP interface device.
+ */
+static int
+pmd_tap_devinit(const char *name, const char *params)
+{
+	int ret = 0;
+	struct rte_kvargs *kvlist;
+	struct tap_info tap_info;
+
+	/* Setup default values */
+	memset(&tap_info, 0, sizeof(tap_info));
+
+	tap_info.speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_info.name, sizeof(tap_info.name), "dtap%d", tap_unit++);
+
+	if ((params == NULL) || (params[0] == '\0')) {
+		RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s\n", name);
+
+		ret = eth_dev_tap_create(name, &tap_info);
+		goto leave;
+	}
+
+	RTE_LOG(INFO, PMD, "Initialize %s with params (%s)\n", name, params);
+
+	kvlist = rte_kvargs_parse(params, valid_arguments);
+	if (!kvlist) {
+		ret = eth_dev_tap_create(name, &tap_info);
+		goto leave;
+	}
+
+	if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_TAP_SPEED_ARG,
+					 &set_interface_speed, &tap_info);
+		if (ret < 0)
+			goto leave;
+	} else
+		set_interface_speed(NULL, NULL, &tap_info);
+
+	if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_TAP_IFACE_ARG,
+					 &set_interface_name, &tap_info);
+		if (ret < 0)
+			goto leave;
+	} else
+		set_interface_name(NULL, NULL, (void *)&tap_info);
+
+	rte_kvargs_free(kvlist);
+
+leave:
+	if (ret == -1)
+		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s\n", name);
+
+	return ret;
+}
+
+/*
+ * detach a TAP device.
+ */
+static int
+pmd_tap_devuninit(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	if (name == NULL)
+		return 0;
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_driver pmd_tap_drv = {
+	.type = PMD_VDEV,
+	.init = pmd_tap_devinit,
+	.uninit = pmd_tap_devuninit,
+};
+
+PMD_REGISTER_DRIVER(pmd_tap_drv, eth_tap);
+DRIVER_REGISTER_PARAM_STRING(eth_tap,
+			     "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 1a0095b..bd1d10f 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -129,6 +129,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
 endif # $(CONFIG_RTE_LIBRTE_VHOST)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)   += -lrte_pmd_aesni_mb
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-16 16:22 ` [PATCH v2] drivers/net:new PMD using tun/tap host interface Keith Wiles
@ 2016-09-18 13:25   ` Yuanhan Liu
  2016-09-18 16:20     ` Wiles, Keith
  2016-09-20  4:05   ` Yuanhan Liu
  1 sibling, 1 reply; 59+ messages in thread
From: Yuanhan Liu @ 2016-09-18 13:25 UTC (permalink / raw)
  To: Keith Wiles; +Cc: dev, pmatilai

On Fri, Sep 16, 2016 at 11:22:13AM -0500, Keith Wiles wrote:
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> on the local host. The PMD allows for DPDK and the host to
> communicate using a raw device interface on the host and in
> the DPDK application. The device created is a Tap device with
> a L2 packet header.
> 
> v2 - merge all of the patches into one patch.
>      Fix a typo on naming the tap device.
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>

Hi,

FYI, my robot caught some build errors with this patch applied.

	--yliu

---
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:71:2: error: redefinition of enumerator 'IFF_UP'
        IFF_UP                          = 1<<0,  /* sysfs */
        ^
/usr/include/net/if.h:45:17: note: expanded from macro 'IFF_UP'
# define IFF_UP IFF_UP
                ^
/usr/include/net/if.h:44:5: note: previous definition is here
    IFF_UP = 0x1,               /* Interface is up.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:72:2: error: redefinition of enumerator 'IFF_BROADCAST'
        IFF_BROADCAST                   = 1<<1,  /* __volatile__ */
        ^
/usr/include/net/if.h:47:24: note: expanded from macro 'IFF_BROADCAST'
# define IFF_BROADCAST  IFF_BROADCAST
                        ^
/usr/include/net/if.h:46:5: note: previous definition is here
    IFF_BROADCAST = 0x2,        /* Broadcast address valid.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:73:2: error: redefinition of enumerator 'IFF_DEBUG'
        IFF_DEBUG                       = 1<<2,  /* sysfs */
        ^
/usr/include/net/if.h:49:20: note: expanded from macro 'IFF_DEBUG'
# define IFF_DEBUG      IFF_DEBUG
                        ^
/usr/include/net/if.h:48:5: note: previous definition is here
    IFF_DEBUG = 0x4,            /* Turn on debugging.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:74:2: error: redefinition of enumerator 'IFF_LOOPBACK'
        IFF_LOOPBACK                    = 1<<3,  /* __volatile__ */
        ^
/usr/include/net/if.h:51:23: note: expanded from macro 'IFF_LOOPBACK'
# define IFF_LOOPBACK   IFF_LOOPBACK
                        ^
/usr/include/net/if.h:50:5: note: previous definition is here
    IFF_LOOPBACK = 0x8,         /* Is a loopback net.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:75:2: error: redefinition of enumerator 'IFF_POINTOPOINT'
        IFF_POINTOPOINT                 = 1<<4,  /* __volatile__ */
        ^
/usr/include/net/if.h:53:26: note: expanded from macro 'IFF_POINTOPOINT'
# define IFF_POINTOPOINT IFF_POINTOPOINT
                         ^
/usr/include/net/if.h:52:5: note: previous definition is here
    IFF_POINTOPOINT = 0x10,     /* Interface is point-to-point link.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:76:2: error: redefinition of enumerator 'IFF_NOTRAILERS'
        IFF_NOTRAILERS                  = 1<<5,  /* sysfs */
        ^
/usr/include/net/if.h:55:25: note: expanded from macro 'IFF_NOTRAILERS'
# define IFF_NOTRAILERS IFF_NOTRAILERS
                        ^
/usr/include/net/if.h:54:5: note: previous definition is here
    IFF_NOTRAILERS = 0x20,      /* Avoid use of trailers.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:77:2: error: redefinition of enumerator 'IFF_RUNNING'
        IFF_RUNNING                     = 1<<6,  /* __volatile__ */
        ^
/usr/include/net/if.h:57:22: note: expanded from macro 'IFF_RUNNING'
# define IFF_RUNNING    IFF_RUNNING
                        ^
/usr/include/net/if.h:56:5: note: previous definition is here
    IFF_RUNNING = 0x40,         /* Resources allocated.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:78:2: error: redefinition of enumerator 'IFF_NOARP'
        IFF_NOARP                       = 1<<7,  /* sysfs */
        ^
/usr/include/net/if.h:59:20: note: expanded from macro 'IFF_NOARP'
# define IFF_NOARP      IFF_NOARP
                        ^
/usr/include/net/if.h:58:5: note: previous definition is here
    IFF_NOARP = 0x80,           /* No address resolution protocol.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:79:2: error: redefinition of enumerator 'IFF_PROMISC'
        IFF_PROMISC                     = 1<<8,  /* sysfs */
        ^
/usr/include/net/if.h:61:22: note: expanded from macro 'IFF_PROMISC'
# define IFF_PROMISC    IFF_PROMISC
                        ^
/usr/include/net/if.h:60:5: note: previous definition is here
    IFF_PROMISC = 0x100,        /* Receive all packets.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:80:2: error: redefinition of enumerator 'IFF_ALLMULTI'
        IFF_ALLMULTI                    = 1<<9,  /* sysfs */
        ^
/usr/include/net/if.h:65:23: note: expanded from macro 'IFF_ALLMULTI'
# define IFF_ALLMULTI   IFF_ALLMULTI
                        ^
/usr/include/net/if.h:64:5: note: previous definition is here
    IFF_ALLMULTI = 0x200,       /* Receive all multicast packets.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:81:2: error: redefinition of enumerator 'IFF_MASTER'
        IFF_MASTER                      = 1<<10, /* __volatile__ */
        ^
/usr/include/net/if.h:68:21: note: expanded from macro 'IFF_MASTER'
# define IFF_MASTER     IFF_MASTER
                        ^
/usr/include/net/if.h:67:5: note: previous definition is here
    IFF_MASTER = 0x400,         /* Master of a load balancer.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:82:2: error: redefinition of enumerator 'IFF_SLAVE'
        IFF_SLAVE                       = 1<<11, /* __volatile__ */
        ^
/usr/include/net/if.h:70:20: note: expanded from macro 'IFF_SLAVE'
# define IFF_SLAVE      IFF_SLAVE
                        ^
/usr/include/net/if.h:69:5: note: previous definition is here
    IFF_SLAVE = 0x800,          /* Slave of a load balancer.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:83:2: error: redefinition of enumerator 'IFF_MULTICAST'
        IFF_MULTICAST                   = 1<<12, /* sysfs */
        ^
/usr/include/net/if.h:73:24: note: expanded from macro 'IFF_MULTICAST'
# define IFF_MULTICAST  IFF_MULTICAST
                        ^
/usr/include/net/if.h:72:5: note: previous definition is here
    IFF_MULTICAST = 0x1000,     /* Supports multicast.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:84:2: error: redefinition of enumerator 'IFF_PORTSEL'
        IFF_PORTSEL                     = 1<<13, /* sysfs */
        ^
/usr/include/net/if.h:76:22: note: expanded from macro 'IFF_PORTSEL'
# define IFF_PORTSEL    IFF_PORTSEL
                        ^
/usr/include/net/if.h:75:5: note: previous definition is here
    IFF_PORTSEL = 0x2000,       /* Can set media type.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:85:2: error: redefinition of enumerator 'IFF_AUTOMEDIA'
        IFF_AUTOMEDIA                   = 1<<14, /* sysfs */
        ^
/usr/include/net/if.h:78:24: note: expanded from macro 'IFF_AUTOMEDIA'
# define IFF_AUTOMEDIA  IFF_AUTOMEDIA
                        ^
/usr/include/net/if.h:77:5: note: previous definition is here
    IFF_AUTOMEDIA = 0x4000,     /* Auto media select active.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:86:2: error: redefinition of enumerator 'IFF_DYNAMIC'
        IFF_DYNAMIC                     = 1<<15, /* sysfs */
        ^
/usr/include/net/if.h:80:22: note: expanded from macro 'IFF_DYNAMIC'
# define IFF_DYNAMIC    IFF_DYNAMIC
                        ^
/usr/include/net/if.h:79:5: note: previous definition is here
    IFF_DYNAMIC = 0x8000        /* Dialup device with changing addresses.  */
    ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:169:8: error: redefinition of 'ifmap'
struct ifmap {
       ^
/usr/include/net/if.h:111:8: note: previous definition is here
struct ifmap
       ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:203:8: error: redefinition of 'ifreq'
struct ifreq {
       ^
/usr/include/net/if.h:126:8: note: previous definition is here
struct ifreq
       ^
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:252:8: error: redefinition of 'ifconf'
struct ifconf  {
       ^
/usr/include/net/if.h:176:8: note: previous definition is here
struct ifconf
       ^
19 errors generated.
make[6]: *** [rte_eth_tap.o] Error 1
make[5]: *** [tap] Error 2
make[5]: *** Waiting for unfinished jobs....
make[4]: *** [net] Error 2
make[3]: *** [drivers] Error 2
make[2]: *** [all] Error 2
make[1]: *** [pre_install] Error 2
make: *** [install] Error 2
error: build failed

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-18 13:25   ` Yuanhan Liu
@ 2016-09-18 16:20     ` Wiles, Keith
  2016-09-19  0:29       ` Yuanhan Liu
  0 siblings, 1 reply; 59+ messages in thread
From: Wiles, Keith @ 2016-09-18 16:20 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, pmatilai


Regards,
Keith

> On Sep 18, 2016, at 8:25 AM, Yuanhan Liu <yuanhan.liu@linux.intel.com> wrote:
> 
> On Fri, Sep 16, 2016 at 11:22:13AM -0500, Keith Wiles wrote:
>> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
>> on the local host. The PMD allows for DPDK and the host to
>> communicate using a raw device interface on the host and in
>> the DPDK application. The device created is a Tap device with
>> a L2 packet header.
>> 
>> v2 - merge all of the patches into one patch.
>>     Fix a typo on naming the tap device.
>>     Update the maintainers list
>> 
>> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> 
> Hi,
> 
> FYI, my robot caught some build errors with this patch applied.

All of the below errors are from Linux header files and not the Tap driver. How are you building DPDK, which config file are you using, OS version, .... Some details would be nice to understand how these errors are being generated and how I can recreate them.

> 
> 	--yliu
> 
> ---
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:71:2: error: redefinition of enumerator 'IFF_UP'
>        IFF_UP                          = 1<<0,  /* sysfs */
>        ^
> /usr/include/net/if.h:45:17: note: expanded from macro 'IFF_UP'
> # define IFF_UP IFF_UP
>                ^
> /usr/include/net/if.h:44:5: note: previous definition is here
>    IFF_UP = 0x1,               /* Interface is up.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:72:2: error: redefinition of enumerator 'IFF_BROADCAST'
>        IFF_BROADCAST                   = 1<<1,  /* __volatile__ */
>        ^
> /usr/include/net/if.h:47:24: note: expanded from macro 'IFF_BROADCAST'
> # define IFF_BROADCAST  IFF_BROADCAST
>                        ^
> /usr/include/net/if.h:46:5: note: previous definition is here
>    IFF_BROADCAST = 0x2,        /* Broadcast address valid.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:73:2: error: redefinition of enumerator 'IFF_DEBUG'
>        IFF_DEBUG                       = 1<<2,  /* sysfs */
>        ^
> /usr/include/net/if.h:49:20: note: expanded from macro 'IFF_DEBUG'
> # define IFF_DEBUG      IFF_DEBUG
>                        ^
> /usr/include/net/if.h:48:5: note: previous definition is here
>    IFF_DEBUG = 0x4,            /* Turn on debugging.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:74:2: error: redefinition of enumerator 'IFF_LOOPBACK'
>        IFF_LOOPBACK                    = 1<<3,  /* __volatile__ */
>        ^
> /usr/include/net/if.h:51:23: note: expanded from macro 'IFF_LOOPBACK'
> # define IFF_LOOPBACK   IFF_LOOPBACK
>                        ^
> /usr/include/net/if.h:50:5: note: previous definition is here
>    IFF_LOOPBACK = 0x8,         /* Is a loopback net.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:75:2: error: redefinition of enumerator 'IFF_POINTOPOINT'
>        IFF_POINTOPOINT                 = 1<<4,  /* __volatile__ */
>        ^
> /usr/include/net/if.h:53:26: note: expanded from macro 'IFF_POINTOPOINT'
> # define IFF_POINTOPOINT IFF_POINTOPOINT
>                         ^
> /usr/include/net/if.h:52:5: note: previous definition is here
>    IFF_POINTOPOINT = 0x10,     /* Interface is point-to-point link.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:76:2: error: redefinition of enumerator 'IFF_NOTRAILERS'
>        IFF_NOTRAILERS                  = 1<<5,  /* sysfs */
>        ^
> /usr/include/net/if.h:55:25: note: expanded from macro 'IFF_NOTRAILERS'
> # define IFF_NOTRAILERS IFF_NOTRAILERS
>                        ^
> /usr/include/net/if.h:54:5: note: previous definition is here
>    IFF_NOTRAILERS = 0x20,      /* Avoid use of trailers.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:77:2: error: redefinition of enumerator 'IFF_RUNNING'
>        IFF_RUNNING                     = 1<<6,  /* __volatile__ */
>        ^
> /usr/include/net/if.h:57:22: note: expanded from macro 'IFF_RUNNING'
> # define IFF_RUNNING    IFF_RUNNING
>                        ^
> /usr/include/net/if.h:56:5: note: previous definition is here
>    IFF_RUNNING = 0x40,         /* Resources allocated.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:78:2: error: redefinition of enumerator 'IFF_NOARP'
>        IFF_NOARP                       = 1<<7,  /* sysfs */
>        ^
> /usr/include/net/if.h:59:20: note: expanded from macro 'IFF_NOARP'
> # define IFF_NOARP      IFF_NOARP
>                        ^
> /usr/include/net/if.h:58:5: note: previous definition is here
>    IFF_NOARP = 0x80,           /* No address resolution protocol.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:79:2: error: redefinition of enumerator 'IFF_PROMISC'
>        IFF_PROMISC                     = 1<<8,  /* sysfs */
>        ^
> /usr/include/net/if.h:61:22: note: expanded from macro 'IFF_PROMISC'
> # define IFF_PROMISC    IFF_PROMISC
>                        ^
> /usr/include/net/if.h:60:5: note: previous definition is here
>    IFF_PROMISC = 0x100,        /* Receive all packets.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:80:2: error: redefinition of enumerator 'IFF_ALLMULTI'
>        IFF_ALLMULTI                    = 1<<9,  /* sysfs */
>        ^
> /usr/include/net/if.h:65:23: note: expanded from macro 'IFF_ALLMULTI'
> # define IFF_ALLMULTI   IFF_ALLMULTI
>                        ^
> /usr/include/net/if.h:64:5: note: previous definition is here
>    IFF_ALLMULTI = 0x200,       /* Receive all multicast packets.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:81:2: error: redefinition of enumerator 'IFF_MASTER'
>        IFF_MASTER                      = 1<<10, /* __volatile__ */
>        ^
> /usr/include/net/if.h:68:21: note: expanded from macro 'IFF_MASTER'
> # define IFF_MASTER     IFF_MASTER
>                        ^
> /usr/include/net/if.h:67:5: note: previous definition is here
>    IFF_MASTER = 0x400,         /* Master of a load balancer.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:82:2: error: redefinition of enumerator 'IFF_SLAVE'
>        IFF_SLAVE                       = 1<<11, /* __volatile__ */
>        ^
> /usr/include/net/if.h:70:20: note: expanded from macro 'IFF_SLAVE'
> # define IFF_SLAVE      IFF_SLAVE
>                        ^
> /usr/include/net/if.h:69:5: note: previous definition is here
>    IFF_SLAVE = 0x800,          /* Slave of a load balancer.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:83:2: error: redefinition of enumerator 'IFF_MULTICAST'
>        IFF_MULTICAST                   = 1<<12, /* sysfs */
>        ^
> /usr/include/net/if.h:73:24: note: expanded from macro 'IFF_MULTICAST'
> # define IFF_MULTICAST  IFF_MULTICAST
>                        ^
> /usr/include/net/if.h:72:5: note: previous definition is here
>    IFF_MULTICAST = 0x1000,     /* Supports multicast.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:84:2: error: redefinition of enumerator 'IFF_PORTSEL'
>        IFF_PORTSEL                     = 1<<13, /* sysfs */
>        ^
> /usr/include/net/if.h:76:22: note: expanded from macro 'IFF_PORTSEL'
> # define IFF_PORTSEL    IFF_PORTSEL
>                        ^
> /usr/include/net/if.h:75:5: note: previous definition is here
>    IFF_PORTSEL = 0x2000,       /* Can set media type.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:85:2: error: redefinition of enumerator 'IFF_AUTOMEDIA'
>        IFF_AUTOMEDIA                   = 1<<14, /* sysfs */
>        ^
> /usr/include/net/if.h:78:24: note: expanded from macro 'IFF_AUTOMEDIA'
> # define IFF_AUTOMEDIA  IFF_AUTOMEDIA
>                        ^
> /usr/include/net/if.h:77:5: note: previous definition is here
>    IFF_AUTOMEDIA = 0x4000,     /* Auto media select active.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:86:2: error: redefinition of enumerator 'IFF_DYNAMIC'
>        IFF_DYNAMIC                     = 1<<15, /* sysfs */
>        ^
> /usr/include/net/if.h:80:22: note: expanded from macro 'IFF_DYNAMIC'
> # define IFF_DYNAMIC    IFF_DYNAMIC
>                        ^
> /usr/include/net/if.h:79:5: note: previous definition is here
>    IFF_DYNAMIC = 0x8000        /* Dialup device with changing addresses.  */
>    ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:169:8: error: redefinition of 'ifmap'
> struct ifmap {
>       ^
> /usr/include/net/if.h:111:8: note: previous definition is here
> struct ifmap
>       ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:203:8: error: redefinition of 'ifreq'
> struct ifreq {
>       ^
> /usr/include/net/if.h:126:8: note: previous definition is here
> struct ifreq
>       ^
> In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> /usr/include/linux/if.h:252:8: error: redefinition of 'ifconf'
> struct ifconf  {
>       ^
> /usr/include/net/if.h:176:8: note: previous definition is here
> struct ifconf
>       ^
> 19 errors generated.
> make[6]: *** [rte_eth_tap.o] Error 1
> make[5]: *** [tap] Error 2
> make[5]: *** Waiting for unfinished jobs....
> make[4]: *** [net] Error 2
> make[3]: *** [drivers] Error 2
> make[2]: *** [all] Error 2
> make[1]: *** [pre_install] Error 2
> make: *** [install] Error 2
> error: build failed

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-18 16:20     ` Wiles, Keith
@ 2016-09-19  0:29       ` Yuanhan Liu
  2016-09-19 15:56         ` Wiles, Keith
  0 siblings, 1 reply; 59+ messages in thread
From: Yuanhan Liu @ 2016-09-19  0:29 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev, pmatilai

On Sun, Sep 18, 2016 at 04:20:12PM +0000, Wiles, Keith wrote:
> 
> Regards,
> Keith
> 
> > On Sep 18, 2016, at 8:25 AM, Yuanhan Liu <yuanhan.liu@linux.intel.com> wrote:
> > 
> > On Fri, Sep 16, 2016 at 11:22:13AM -0500, Keith Wiles wrote:
> >> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> >> on the local host. The PMD allows for DPDK and the host to
> >> communicate using a raw device interface on the host and in
> >> the DPDK application. The device created is a Tap device with
> >> a L2 packet header.
> >> 
> >> v2 - merge all of the patches into one patch.
> >>     Fix a typo on naming the tap device.
> >>     Update the maintainers list
> >> 
> >> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> > 
> > Hi,
> > 
> > FYI, my robot caught some build errors with this patch applied.
> 
> All of the below errors are from Linux header files and not the Tap driver.

Yes, but you are referencing them, so ...

> How are you building DPDK, which config file are you using, OS version, .... Some details would be nice to understand how these errors are being generated and how I can recreate them.

My bad. I should have included all of them: my robot is still at a very
rough stage: those info should be included automatically in future (badly,
I don't even have time to enhance it).

However, for this issue, I think it can be triggered on most (if not any)
linux platforms: I got the report from ubuntu-16.04, and I have just tried
it on my dev box, which is fedora 20: I got the same error.

	--yliu

> > ---
> > In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
> > /usr/include/linux/if.h:71:2: error: redefinition of enumerator 'IFF_UP'
> >        IFF_UP                          = 1<<0,  /* sysfs */
> >        ^
> > /usr/include/net/if.h:45:17: note: expanded from macro 'IFF_UP'
> > # define IFF_UP IFF_UP
> >                ^

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-19  0:29       ` Yuanhan Liu
@ 2016-09-19 15:56         ` Wiles, Keith
  2016-09-20  3:54           ` Yuanhan Liu
  0 siblings, 1 reply; 59+ messages in thread
From: Wiles, Keith @ 2016-09-19 15:56 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, pmatilai


Sent from my iPhone

On Sep 18, 2016, at 7:28 PM, Yuanhan Liu <yuanhan.liu@linux.intel.com<mailto:yuanhan.liu@linux.intel.com>> wrote:

On Sun, Sep 18, 2016 at 04:20:12PM +0000, Wiles, Keith wrote:

Regards,
Keith

On Sep 18, 2016, at 8:25 AM, Yuanhan Liu <yuanhan.liu@linux.intel.com<mailto:yuanhan.liu@linux.intel.com>> wrote:

On Fri, Sep 16, 2016 at 11:22:13AM -0500, Keith Wiles wrote:
The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

v2 - merge all of the patches into one patch.
   Fix a typo on naming the tap device.
   Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com<mailto:keith.wiles@intel.com>>

Hi,

FYI, my robot caught some build errors with this patch applied.

All of the below errors are from Linux header files and not the Tap driver.

Yes, but you are referencing them, so ...

Yes I am referencing them, but still they are not interacting with the tap driver. It would seem like you could delete all of the code in the tap driver and still get these errors. Which you could try if you want, just ifdef the code and see if it still happens.

On my ubuntu 16.04 machine I do not get these errors. I need to know how to reproduce the failure to fix it.



How are you building DPDK, which config file are you using, OS version, .... Some details would be nice to understand how these errors are being generated and how I can recreate them.

My bad. I should have included all of them: my robot is still at a very
rough stage: those info should be included automatically in future (badly,
I don't even have time to enhance it).

However, for this issue, I think it can be triggered on most (if not any)
linux platforms: I got the report from ubuntu-16.04, and I have just tried
it on my dev box, which is fedora 20: I got the same error.

   --yliu

---
In file included from /yeti/vm/ubuntu-initrd-16.04-x86_64-build/dpdk/drivers/net/tap/rte_eth_tap.c:50:
/usr/include/linux/if.h:71:2: error: redefinition of enumerator 'IFF_UP'
      IFF_UP                          = 1<<0,  /* sysfs */
      ^
/usr/include/net/if.h:45:17: note: expanded from macro 'IFF_UP'
# define IFF_UP IFF_UP
              ^

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-19 15:56         ` Wiles, Keith
@ 2016-09-20  3:54           ` Yuanhan Liu
  0 siblings, 0 replies; 59+ messages in thread
From: Yuanhan Liu @ 2016-09-20  3:54 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev, pmatilai

On Mon, Sep 19, 2016 at 03:56:03PM +0000, Wiles, Keith wrote:
>         All of the below errors are from Linux header files and not the Tap
>         driver.
> 
>    
>     Yes, but you are referencing them, so ...
> 
> 
> Yes I am referencing them, but still they are not interacting with the tap
> driver.

Yes, they are not. But the tap driver code references them. I mean, you
might have done that wrongly. e.g., the build passes if I removed few
includes for linux header files:

    diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
    index 4621c55..2a6bcd0 100644
    --- a/drivers/net/tap/rte_eth_tap.c
    +++ b/drivers/net/tap/rte_eth_tap.c
    @@ -47,9 +47,7 @@
     #include <arpa/inet.h>
     #include <net/if.h>
     #ifdef __linux__
    -#include <linux/if.h>
     #include <linux/if_tun.h>
    -#include <linux/if_ether.h>
     #else
     #include <netinet/if_ether.h>
     #endif

> It would seem like you could delete all of the code in the tap driver
> and still get these errors. Which you could try if you want, just ifdef the
> code and see if it still happens. 
> 
> On my ubuntu 16.04 machine I do not get these errors. I need to know how to
> reproduce the failure to fix it. 

Interesting! I really did no magic to reproduce it. As said, I can
reproduce it pretty easily with ubuntu 16.04 and fedora 20, just with
default build:

    $ make install T=$RTE_SDK

Anyway, the error is straightforward after all: both files (net/if.h and
linux/if.h) define same structures and macros. Including both of them
would result errors like I reported.

	--yliu

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-16 16:22 ` [PATCH v2] drivers/net:new PMD using tun/tap host interface Keith Wiles
  2016-09-18 13:25   ` Yuanhan Liu
@ 2016-09-20  4:05   ` Yuanhan Liu
  2016-09-21  1:32     ` Wiles, Keith
  1 sibling, 1 reply; 59+ messages in thread
From: Yuanhan Liu @ 2016-09-20  4:05 UTC (permalink / raw)
  To: Keith Wiles; +Cc: dev, pmatilai

[ just got few more comments after some fiddling with the build issue ]

On Fri, Sep 16, 2016 at 11:22:13AM -0500, Keith Wiles wrote:
> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
> new file mode 100644
> index 0000000..442a2fe
> --- /dev/null
> +++ b/drivers/net/tap/Makefile
> @@ -0,0 +1,60 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2014 John W. Linville <linville@redhat.com>
> +#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> +#   Copyright(c) 2014 6WIND S.A.

Just wondering, is it on purpose, or just yet another copy-paste error?

> +#include <arpa/inet.h>
> +#include <net/if.h>
> +#ifdef __linux__

So, you meant to add other OS support, say BSD?

> +#include <linux/if.h>
> +#include <linux/if_tun.h>
> +#include <linux/if_ether.h>
> +#else
> +#include <netinet/if_ether.h>
> +#endif
> +#include <fcntl.h>
> +
> +#include <poll.h>
> +
> +/* Linux based path to the TUN device */
> +#define TUN_TAP_DEV_PATH        "/dev/net/tun"

However, you hardcoded a linux only path here. While checking the code
from qemu, I saw that the path is actually different from different UNIX
variants, even for FreeBSD and NetBSD.

    [yliu@yliu-dev ~/qemu]$ grep -E "/dev/.*(tap|tun)" net/tap*.c
    net/tap-bsd.c:            snprintf(dname, sizeof dname, "/dev/tun%d", i);
    net/tap-bsd.c:            snprintf(dname, sizeof dname, "/dev/tap%d", i);
    net/tap-bsd.c:#define PATH_NET_TAP "/dev/tap"
    net/tap-linux.c:#define PATH_NET_TUN "/dev/net/tun"
    net/tap-solaris.c:    TFR(tap_fd = open("/dev/tap", O_RDWR, 0));
    ...

	--yliu

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-20  4:05   ` Yuanhan Liu
@ 2016-09-21  1:32     ` Wiles, Keith
  2016-09-21  2:13       ` Yuanhan Liu
  0 siblings, 1 reply; 59+ messages in thread
From: Wiles, Keith @ 2016-09-21  1:32 UTC (permalink / raw)
  To: Yuanhan Liu; +Cc: dev, pmatilai


Regards,
Keith

> On Sep 20, 2016, at 12:05 AM, Yuanhan Liu <yuanhan.liu@linux.intel.com> wrote:
> 
> [ just got few more comments after some fiddling with the build issue ]
> 
> On Fri, Sep 16, 2016 at 11:22:13AM -0500, Keith Wiles wrote:
>> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
>> new file mode 100644
>> index 0000000..442a2fe
>> --- /dev/null
>> +++ b/drivers/net/tap/Makefile
>> @@ -0,0 +1,60 @@
>> +#   BSD LICENSE
>> +#
>> +#   Copyright(c) 2014 John W. Linville <linville@redhat.com>
>> +#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> +#   Copyright(c) 2014 6WIND S.A.
> 
> Just wondering, is it on purpose, or just yet another copy-paste error?

Yes, I will send another patch to fix this one.

> 
>> +#include <arpa/inet.h>
>> +#include <net/if.h>
>> +#ifdef __linux__
> 
> So, you meant to add other OS support, say BSD?

I did not mean to support BSD, just Linux and I will remove the ifdef to reflect it. If you look at the configuration file I only added it to the common_linux file and not BSD.

The concern I am having is on my standard Ubuntu 16.04 system these errors do not appear. I would like to understand why they appeared on your system. The ifdef __linux__ must be enabled as the defines in the linux/if_tun.h file do not give an error. I did play with the headers already and removed the ifdef, but as I could not reproduce your build failure it did not trigger anything new.

I will look at it some more, but it does not make sense and I do want to make sure it works.

> 
>> +#include <linux/if.h>
>> +#include <linux/if_tun.h>
>> +#include <linux/if_ether.h>
>> +#else
>> +#include <netinet/if_ether.h>
>> +#endif
>> +#include <fcntl.h>
>> +
>> +#include <poll.h>
>> +
>> +/* Linux based path to the TUN device */
>> +#define TUN_TAP_DEV_PATH        "/dev/net/tun"
> 
> However, you hardcoded a linux only path here. While checking the code
> from qemu, I saw that the path is actually different from different UNIX
> variants, even for FreeBSD and NetBSD.

I only assumed this to work for Linux and not FreeBSD/NetBSD as the handling of the opens on the tun device are different then linux. I also only added it to the common_linux configuration file.

> 
>    [yliu@yliu-dev ~/qemu]$ grep -E "/dev/.*(tap|tun)" net/tap*.c
>    net/tap-bsd.c:            snprintf(dname, sizeof dname, "/dev/tun%d", i);
>    net/tap-bsd.c:            snprintf(dname, sizeof dname, "/dev/tap%d", i);
>    net/tap-bsd.c:#define PATH_NET_TAP "/dev/tap"
>    net/tap-linux.c:#define PATH_NET_TUN "/dev/net/tun"
>    net/tap-solaris.c:    TFR(tap_fd = open("/dev/tap", O_RDWR, 0));
>    ...
> 
> 	--yliu

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v3] drivers/net:new PMD using tun/tap host interface
  2016-09-15 14:10 ` [PATCH 2/3] docs:tun/tap PMD information Keith Wiles
  2016-09-15 14:13   ` Wiles, Keith
@ 2016-09-21  2:00   ` Keith Wiles
  2016-10-04 14:45     ` [PATCH v4] " Keith Wiles
                       ` (4 more replies)
  1 sibling, 5 replies; 59+ messages in thread
From: Keith Wiles @ 2016-09-21  2:00 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu

The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

v3 - fix includes by removing ifdef for other type besides Linux.
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch.
     Fix a typo on naming the tap device.
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_linuxapp                  |   2 +
 doc/guides/nics/tap.rst                 |  84 ++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 867 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 8 files changed, 1021 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 7c33ad4..fad74e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -392,6 +392,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..59a2053 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,5 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
+CONFIG_RTE_PMD_TAP_MAX_QUEUES=32
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..072def8
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,84 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================================
+
+The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
+host. The PMD allows for DPDK and the host to communicate using a raw device
+interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw format
+with a L2 header. The usage for a TAP PMD is for connectivity to the local host
+using a TAP interface. When the TAP PMD is initialized it will create a number
+of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
+commands can be used to assign and query the virtual like device.
+
+These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
+with being able to be used as a network connection to the DPDK application. The
+method enable one or more interfaces is to use the --vdev=eth_tap option on the
+DPDK application  command line. Each --vdev=eth_tap option give will create an
+interface named dtap0, dtap1, ... and so forth.
+
+.. code-block:: console
+
+   The interfaced name can be changed by adding the iface=foo0
+   e.g. --vedv=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
+
+.. code-block:: console
+
+   Also the speed of the interface can be changed from 10G to whatever number
+   needed, but the interface does not enforce that speed.
+   e.g. --vdev=eth_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
+of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
+others to communicate with the DPDK application. The DPDK application may not
+understand network protocols like IPv4/6, UDP or TCP unless the application has
+been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands:
+
+.. code-block:: console
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you account
+for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal interface.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..b4afa98 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..e60844b
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,867 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_dev.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+#include <poll.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit = 0;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct tap_info {
+	char name[RTE_ETH_NAME_MAX_LEN]; /* Interface name supplied/given */
+	int speed;			 /* Speed of interface */
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;			/* Number of queues supported */
+	uint16_t pad0;
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/*
+ * Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char * name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n", ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the fiile descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		strcpy(name, ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/*
+ * Callback to handle the rx burst of packets to the correct interface and file
+ * descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len, n;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	struct pollfd pfd;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	pfd.events = POLLIN;
+	pfd.fd = rxq->fd;
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents == 0)
+			continue;
+
+		if (pfd.revents & POLLERR) {
+			rxq->stats.errs++;
+			RTE_LOG(ERR, PMD, "Packet Error\n");
+			break;
+		}
+		if (pfd.revents & POLLHUP)
+			RTE_LOG(ERR, PMD, "Peer closed connection\n");
+
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(mbuf == NULL)) {
+			RTE_LOG(ERR, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(pfd.fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			RTE_LOG(ERR, PMD, "len %d\n", len);
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/*
+ * Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/*
+ * This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = (uint16_t)internals->nb_queues;
+	dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+{
+	unsigned i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *internal = dev->data->dev_private;
+
+	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		igb_stats->q_ipackets[i] = internal->rxq[i].stats.ipackets;
+		igb_stats->q_ibytes[i] = internal->rxq[i].stats.ibytes;
+		rx_total += igb_stats->q_ipackets[i];
+		rx_bytes_total += igb_stats->q_ibytes[i];
+	}
+
+	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		igb_stats->q_opackets[i] = internal->txq[i].stats.opackets;
+		igb_stats->q_errors[i] = internal->txq[i].stats.errs;
+		igb_stats->q_obytes[i] = internal->txq[i].stats.obytes;
+		tx_total += igb_stats->q_opackets[i];
+		tx_err_total += igb_stats->q_errors[i];
+		tx_bytes_total += igb_stats->q_obytes[i];
+	}
+
+	igb_stats->ipackets = rx_total;
+	igb_stats->ibytes = rx_bytes_total;
+	igb_stats->opackets = tx_total;
+	igb_stats->oerrors = tx_err_total;
+	igb_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internal = dev->data->dev_private;
+
+	for (i = 0; i < internal->nb_queues; i++) {
+		internal->rxq[i].stats.ipackets = 0;
+		internal->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < internal->nb_queues; i++) {
+		internal->txq[i].stats.opackets = 0;
+		internal->txq[i].stats.errs = 0;
+		internal->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	if ((fd = rx->fd) < 0)
+		if ((fd = tx->fd) < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			if ((fd = tun_alloc(dev->data->name)) < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
+				return -1;
+			}
+		}
+
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || (mp == NULL)) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n", internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t) (rte_pktmbuf_data_room_size(mp) -
+			       RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret = -1;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return ret;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+#define RTE_USE_GLOBAL_DATA	0x0000
+#define RTE_USE_PRIVATE_DATA	0x0001
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || (dev == NULL) || (addr == NULL))
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/*
+	 * Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+rte_eth_dev_create(const char *name, int dev_type,
+		   struct rte_eth_dev **eth_dev,
+		   const struct eth_dev_ops *dev_ops,
+		   void **internals, size_t internal_size,
+		   uint16_t flag)
+{
+	char buff[RTE_ETH_NAME_MAX_LEN];
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	void *priv = NULL;
+
+	if ((name == NULL) || (eth_dev == NULL) || (dev_ops == NULL) ||
+	    (internals == NULL) || (internal_size == 0)) {
+		RTE_PMD_DEBUG_TRACE("Paramters are invalid\n");
+		return -1;
+	}
+
+	dev = rte_eth_dev_allocate(name, dev_type);
+	if (dev == NULL) {
+		RTE_PMD_DEBUG_TRACE("%s: rte_eth_dev_allocate failed for %s\n",
+				    name, buff);
+		goto error;
+	}
+
+	if (flag & RTE_USE_PRIVATE_DATA) {
+		/*
+		 * now do all data allocation - for eth_dev structure, dummy
+		 * pci driver and internal (private) data
+		 */
+		snprintf(buff, sizeof(buff), "D-%s-%d", name, numa_node);
+		data = rte_zmalloc_socket(buff, sizeof(struct rte_eth_dev_data),
+					  0, numa_node);
+		if (data == NULL) {
+			RTE_PMD_DEBUG_TRACE("%s: Unable to allocate memory\n",
+					    name);
+			goto error;
+		}
+		/* move the current state of the structure to the new one */
+		rte_memcpy(data, dev->data, sizeof(struct rte_eth_dev_data));
+		dev->data = data;	/* Override the current data pointer */
+	} else
+		data = dev->data;
+
+	snprintf(buff, sizeof(buff), "I-%s-%d", name, numa_node);
+	priv = rte_zmalloc_socket(buff, internal_size, 0, numa_node);
+	if (priv == NULL) {
+		RTE_PMD_DEBUG_TRACE("Unable to allocate internal memory %lu\n",
+				    internal_size);
+		goto error;
+	}
+
+	/* Setup some default values */
+	dev->dev_ops = dev_ops;
+	data->dev_private = priv;
+	data->port_id = dev->data->port_id;
+	memmove(data->name, dev->data->name, strlen(dev->data->name));
+
+	dev->driver = NULL;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->numa_node = numa_node;
+
+	*eth_dev = dev;
+	*internals = priv;
+
+	return 0;
+error:
+	rte_free(priv);
+
+	if (flag & RTE_USE_PRIVATE_DATA)
+		rte_free(data);
+
+	rte_eth_dev_release_port(dev);
+
+	return -1;
+}
+
+static int
+pmd_init_internals(const char *name, struct tap_info *tap,
+		   struct pmd_internals **internals,
+		   struct rte_eth_dev **eth_dev)
+{
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *internal = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int ret, i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TUN/TAP Ethernet device with %d queues on numa %u\n",
+		name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	pmd_link.link_speed = tap->speed;
+
+	ret = rte_eth_dev_create(tap->name, RTE_ETH_DEV_VIRTUAL, &dev, &ops,
+				 (void **)&internal, sizeof(struct pmd_internals),
+				 RTE_USE_PRIVATE_DATA);
+	if (ret < 0)
+		return -1;
+
+	strncpy(internal->name, tap->name, sizeof(internal->name));
+
+	internal->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Create the first Tap device */
+	if ((fd = tun_alloc(dev->data->name)) < 0) {
+		RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
+		rte_free(internal);
+		rte_eth_dev_release_port(dev);
+		return -1;
+	}
+
+	/* Presetup the fds to -1 as being not working */
+	for(i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		internal->fds[i] = -1;
+		internal->rxq[i].fd = -1;
+		internal->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	internal->rxq[0].fd = fd;
+	internal->txq[0].fd = fd;
+	internal->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &internal->eth_addr) < 0) {
+		rte_free(internal);
+		rte_eth_dev_release_port(dev);
+		return -1;
+	}
+
+	data = dev->data;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &internal->eth_addr;
+
+	data->nb_rx_queues = (uint16_t)internal->nb_queues;
+	data->nb_tx_queues = (uint16_t)internal->nb_queues;
+	data->drv_name = drivername;
+
+	*eth_dev = dev;
+	*internals = internal;
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, struct tap_info *tap)
+{
+	struct pmd_internals *internals = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+
+	if (pmd_init_internals(name, tap, &internals, &eth_dev) < 0)
+		return -1;
+
+	eth_dev->rx_pkt_burst = pmd_rx_burst;
+	eth_dev->tx_pkt_burst = pmd_tx_burst;
+
+	return 0;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	struct tap_info *tap = (struct tap_info *)extra_args;
+
+	if (value)
+		snprintf(tap->name, sizeof(tap->name), "%s", value);
+	else
+		snprintf(tap->name, sizeof(tap->name), "dtap%d", (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args __rte_unused)
+{
+	struct tap_info *tap = (struct tap_info *)extra_args;
+
+	pmd_link.link_speed = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+	tap->speed = pmd_link.link_speed;
+
+	return 0;
+}
+
+/*
+ * Open a TAP interface device.
+ */
+static int
+pmd_tap_devinit(const char *name, const char *params)
+{
+	int ret = 0;
+	struct rte_kvargs *kvlist;
+	struct tap_info tap_info;
+
+	/* Setup default values */
+	memset(&tap_info, 0, sizeof(tap_info));
+
+	tap_info.speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_info.name, sizeof(tap_info.name), "dtap%d", tap_unit++);
+
+	if ((params == NULL) || (params[0] == '\0')) {
+		RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s\n", name);
+
+		ret = eth_dev_tap_create(name, &tap_info);
+		goto leave;
+	}
+
+	RTE_LOG(INFO, PMD, "Initialize %s with params (%s)\n", name, params);
+
+	kvlist = rte_kvargs_parse(params, valid_arguments);
+	if (!kvlist) {
+		ret = eth_dev_tap_create(name, &tap_info);
+		goto leave;
+	}
+
+	if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_TAP_SPEED_ARG,
+					 &set_interface_speed, &tap_info);
+		if (ret < 0)
+			goto leave;
+	} else
+		set_interface_speed(NULL, NULL, &tap_info);
+
+	if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_TAP_IFACE_ARG,
+					 &set_interface_name, &tap_info);
+		if (ret < 0)
+			goto leave;
+	} else
+		set_interface_name(NULL, NULL, (void *)&tap_info);
+
+	rte_kvargs_free(kvlist);
+
+leave:
+	if (ret == -1)
+		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s\n", name);
+
+	return ret;
+}
+
+/*
+ * detach a TAP device.
+ */
+static int
+pmd_tap_devuninit(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	if (name == NULL)
+		return 0;
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_driver pmd_tap_drv = {
+	.type = PMD_VDEV,
+	.init = pmd_tap_devinit,
+	.uninit = pmd_tap_devuninit,
+};
+
+PMD_REGISTER_DRIVER(pmd_tap_drv, eth_tap);
+DRIVER_REGISTER_PARAM_STRING(eth_tap,
+			     "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 1a0095b..bd1d10f 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -129,6 +129,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
 endif # $(CONFIG_RTE_LIBRTE_VHOST)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)   += -lrte_pmd_aesni_mb
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-21  1:32     ` Wiles, Keith
@ 2016-09-21  2:13       ` Yuanhan Liu
  2016-09-21  8:24         ` Thomas Monjalon
  0 siblings, 1 reply; 59+ messages in thread
From: Yuanhan Liu @ 2016-09-21  2:13 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev, pmatilai

On Wed, Sep 21, 2016 at 01:32:15AM +0000, Wiles, Keith wrote:
> The concern I am having is on my standard Ubuntu 16.04 system these errors do not appear.

Yes, that's really weird. To me, it's such a solid error that could be
triggered on all linux platforms.

Anyway, I saw you removed the <net/if.h> including in v3, which makes
my robot happy :)

> I would like to understand why they appeared on your system. The ifdef __linux__ must be enabled as the defines in the linux/if_tun.h file do not give an error. I did play with the headers already and removed the ifdef, but as I could not reproduce your build failure it did not trigger anything new.
> 
> I will look at it some more, but it does not make sense and I do want to make sure it works.
> 
> > 
> >> +#include <linux/if.h>
> >> +#include <linux/if_tun.h>
> >> +#include <linux/if_ether.h>
> >> +#else
> >> +#include <netinet/if_ether.h>
> >> +#endif
> >> +#include <fcntl.h>
> >> +
> >> +#include <poll.h>
> >> +
> >> +/* Linux based path to the TUN device */
> >> +#define TUN_TAP_DEV_PATH        "/dev/net/tun"
> > 
> > However, you hardcoded a linux only path here. While checking the code
> > from qemu, I saw that the path is actually different from different UNIX
> > variants, even for FreeBSD and NetBSD.
> 
> I only assumed this to work for Linux and not FreeBSD/NetBSD as the handling of the opens on the tun device are different then linux. I also only added it to the common_linux configuration file.

I didn't notice the common_linux file, but I have guessed so (that you
meant to enable linux only): the "ifdef __linux" stuff just confuses me
a bit.

	--yliu
> 
> > 
> >    [yliu@yliu-dev ~/qemu]$ grep -E "/dev/.*(tap|tun)" net/tap*.c
> >    net/tap-bsd.c:            snprintf(dname, sizeof dname, "/dev/tun%d", i);
> >    net/tap-bsd.c:            snprintf(dname, sizeof dname, "/dev/tap%d", i);
> >    net/tap-bsd.c:#define PATH_NET_TAP "/dev/tap"
> >    net/tap-linux.c:#define PATH_NET_TUN "/dev/net/tun"
> >    net/tap-solaris.c:    TFR(tap_fd = open("/dev/tap", O_RDWR, 0));
> >    ...
> > 
> > 	--yliu

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-21  2:13       ` Yuanhan Liu
@ 2016-09-21  8:24         ` Thomas Monjalon
  2016-09-21 23:55           ` Wiles, Keith
  0 siblings, 1 reply; 59+ messages in thread
From: Thomas Monjalon @ 2016-09-21  8:24 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev, Yuanhan Liu, pmatilai

2016-09-21 10:13, Yuanhan Liu:
> On Wed, Sep 21, 2016 at 01:32:15AM +0000, Wiles, Keith wrote:
> > I only assumed this to work for Linux and not FreeBSD/NetBSD as the handling of the opens on the tun device are different then linux. I also only added it to the common_linux configuration file.
> 
> I didn't notice the common_linux file, but I have guessed so (that you
> meant to enable linux only): the "ifdef __linux" stuff just confuses me
> a bit.

By the way, we have a macro for Linux environment: RTE_EXEC_ENV_LINUXAPP
Please prefer it over __linux

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v2] drivers/net:new PMD using tun/tap host interface
  2016-09-21  8:24         ` Thomas Monjalon
@ 2016-09-21 23:55           ` Wiles, Keith
  0 siblings, 0 replies; 59+ messages in thread
From: Wiles, Keith @ 2016-09-21 23:55 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: <dev@dpdk.org>, Yuanhan Liu, pmatilai


Regards,
Keith

> On Sep 21, 2016, at 4:24 AM, Thomas Monjalon <thomas.monjalon@6wind.com> wrote:
> 
> 2016-09-21 10:13, Yuanhan Liu:
>> On Wed, Sep 21, 2016 at 01:32:15AM +0000, Wiles, Keith wrote:
>>> I only assumed this to work for Linux and not FreeBSD/NetBSD as the handling of the opens on the tun device are different then linux. I also only added it to the common_linux configuration file.
>> 
>> I didn't notice the common_linux file, but I have guessed so (that you
>> meant to enable linux only): the "ifdef __linux" stuff just confuses me
>> a bit.
> 
> By the way, we have a macro for Linux environment: RTE_EXEC_ENV_LINUXAPP
> Please prefer it over __linux

I removed the __linux__ ifdef in v3, BTW I think we need to grep the code for the __linux__ as I started with one of the other drivers. I will try to remember to grep the drivers when I am back home.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v4] drivers/net:new PMD using tun/tap host interface
  2016-09-21  2:00   ` [PATCH v3] drivers/net:new PMD using tun/tap host interface Keith Wiles
@ 2016-10-04 14:45     ` Keith Wiles
  2016-10-11  9:40       ` Ferruh Yigit
                         ` (4 more replies)
  2016-10-12 20:54     ` [PATCH v6] " Keith Wiles
                       ` (3 subsequent siblings)
  4 siblings, 5 replies; 59+ messages in thread
From: Keith Wiles @ 2016-10-04 14:45 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu

The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux.
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch.
     Fix a typo on naming the tap device.
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_linuxapp                  |   2 +
 doc/guides/nics/tap.rst                 |  84 ++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 866 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 8 files changed, 1020 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 7c33ad4..fad74e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -392,6 +392,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..59a2053 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,5 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
+CONFIG_RTE_PMD_TAP_MAX_QUEUES=32
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..072def8
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,84 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================================
+
+The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
+host. The PMD allows for DPDK and the host to communicate using a raw device
+interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw format
+with a L2 header. The usage for a TAP PMD is for connectivity to the local host
+using a TAP interface. When the TAP PMD is initialized it will create a number
+of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
+commands can be used to assign and query the virtual like device.
+
+These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
+with being able to be used as a network connection to the DPDK application. The
+method enable one or more interfaces is to use the --vdev=eth_tap option on the
+DPDK application  command line. Each --vdev=eth_tap option give will create an
+interface named dtap0, dtap1, ... and so forth.
+
+.. code-block:: console
+
+   The interfaced name can be changed by adding the iface=foo0
+   e.g. --vedv=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
+
+.. code-block:: console
+
+   Also the speed of the interface can be changed from 10G to whatever number
+   needed, but the interface does not enforce that speed.
+   e.g. --vdev=eth_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
+of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
+others to communicate with the DPDK application. The DPDK application may not
+understand network protocols like IPv4/6, UDP or TCP unless the application has
+been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands:
+
+.. code-block:: console
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you account
+for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal interface.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..b4afa98 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..680edd1
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,866 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+#include <poll.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit = 0;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct tap_info {
+	char name[RTE_ETH_NAME_MAX_LEN]; /* Interface name supplied/given */
+	int speed;			 /* Speed of interface */
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;			/* Number of queues supported */
+	uint16_t pad0;
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/*
+ * Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char * name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n", ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the fiile descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		strcpy(name, ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/*
+ * Callback to handle the rx burst of packets to the correct interface and file
+ * descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len, n;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	struct pollfd pfd;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	pfd.events = POLLIN;
+	pfd.fd = rxq->fd;
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents == 0)
+			continue;
+
+		if (pfd.revents & POLLERR) {
+			rxq->stats.errs++;
+			RTE_LOG(ERR, PMD, "Packet Error\n");
+			break;
+		}
+		if (pfd.revents & POLLHUP)
+			RTE_LOG(ERR, PMD, "Peer closed connection\n");
+
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(mbuf == NULL)) {
+			RTE_LOG(ERR, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(pfd.fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			RTE_LOG(ERR, PMD, "len %d\n", len);
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/*
+ * Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/*
+ * This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = (uint16_t)internals->nb_queues;
+	dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
+{
+	unsigned i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *internal = dev->data->dev_private;
+
+	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		igb_stats->q_ipackets[i] = internal->rxq[i].stats.ipackets;
+		igb_stats->q_ibytes[i] = internal->rxq[i].stats.ibytes;
+		rx_total += igb_stats->q_ipackets[i];
+		rx_bytes_total += igb_stats->q_ibytes[i];
+	}
+
+	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		igb_stats->q_opackets[i] = internal->txq[i].stats.opackets;
+		igb_stats->q_errors[i] = internal->txq[i].stats.errs;
+		igb_stats->q_obytes[i] = internal->txq[i].stats.obytes;
+		tx_total += igb_stats->q_opackets[i];
+		tx_err_total += igb_stats->q_errors[i];
+		tx_bytes_total += igb_stats->q_obytes[i];
+	}
+
+	igb_stats->ipackets = rx_total;
+	igb_stats->ibytes = rx_bytes_total;
+	igb_stats->opackets = tx_total;
+	igb_stats->oerrors = tx_err_total;
+	igb_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internal = dev->data->dev_private;
+
+	for (i = 0; i < internal->nb_queues; i++) {
+		internal->rxq[i].stats.ipackets = 0;
+		internal->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < internal->nb_queues; i++) {
+		internal->txq[i].stats.opackets = 0;
+		internal->txq[i].stats.errs = 0;
+		internal->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	if ((fd = rx->fd) < 0)
+		if ((fd = tx->fd) < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			if ((fd = tun_alloc(dev->data->name)) < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
+				return -1;
+			}
+		}
+
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || (mp == NULL)) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n", internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t) (rte_pktmbuf_data_room_size(mp) -
+			       RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret = -1;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return ret;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+#define RTE_USE_GLOBAL_DATA	0x0000
+#define RTE_USE_PRIVATE_DATA	0x0001
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || (dev == NULL) || (addr == NULL))
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/*
+	 * Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+rte_eth_dev_create(const char *name,
+		   struct rte_eth_dev **eth_dev,
+		   const struct eth_dev_ops *dev_ops,
+		   void **internals, size_t internal_size,
+		   uint16_t flag)
+{
+	char buff[RTE_ETH_NAME_MAX_LEN];
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	void *priv = NULL;
+
+	if ((name == NULL) || (eth_dev == NULL) || (dev_ops == NULL) ||
+	    (internals == NULL) || (internal_size == 0)) {
+		RTE_PMD_DEBUG_TRACE("Paramters are invalid\n");
+		return -1;
+	}
+
+	dev = rte_eth_dev_allocate(name);
+	if (dev == NULL) {
+		RTE_PMD_DEBUG_TRACE("%s: rte_eth_dev_allocate failed for %s\n",
+				    name, buff);
+		goto error;
+	}
+
+	if (flag & RTE_USE_PRIVATE_DATA) {
+		/*
+		 * now do all data allocation - for eth_dev structure, dummy
+		 * pci driver and internal (private) data
+		 */
+		snprintf(buff, sizeof(buff), "D-%s-%d", name, numa_node);
+		data = rte_zmalloc_socket(buff, sizeof(struct rte_eth_dev_data),
+					  0, numa_node);
+		if (data == NULL) {
+			RTE_PMD_DEBUG_TRACE("%s: Unable to allocate memory\n",
+					    name);
+			goto error;
+		}
+		/* move the current state of the structure to the new one */
+		rte_memcpy(data, dev->data, sizeof(struct rte_eth_dev_data));
+		dev->data = data;	/* Override the current data pointer */
+	} else
+		data = dev->data;
+
+	snprintf(buff, sizeof(buff), "I-%s-%d", name, numa_node);
+	priv = rte_zmalloc_socket(buff, internal_size, 0, numa_node);
+	if (priv == NULL) {
+		RTE_PMD_DEBUG_TRACE("Unable to allocate internal memory %lu\n",
+				    internal_size);
+		goto error;
+	}
+
+	/* Setup some default values */
+	dev->dev_ops = dev_ops;
+	data->dev_private = priv;
+	data->port_id = dev->data->port_id;
+	memmove(data->name, dev->data->name, strlen(dev->data->name));
+
+	dev->driver = NULL;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->numa_node = numa_node;
+
+	*eth_dev = dev;
+	*internals = priv;
+
+	return 0;
+error:
+	rte_free(priv);
+
+	if (flag & RTE_USE_PRIVATE_DATA)
+		rte_free(data);
+
+	rte_eth_dev_release_port(dev);
+
+	return -1;
+}
+
+static int
+pmd_init_internals(const char *name, struct tap_info *tap,
+		   struct pmd_internals **internals,
+		   struct rte_eth_dev **eth_dev)
+{
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *internal = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int ret, i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TUN/TAP Ethernet device with %d queues on numa %u\n",
+		name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	pmd_link.link_speed = tap->speed;
+
+	ret = rte_eth_dev_create(tap->name, &dev, &ops,
+				 (void **)&internal, sizeof(struct pmd_internals),
+				 RTE_USE_PRIVATE_DATA);
+	if (ret < 0)
+		return -1;
+
+	strncpy(internal->name, tap->name, sizeof(internal->name));
+
+	internal->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Create the first Tap device */
+	if ((fd = tun_alloc(dev->data->name)) < 0) {
+		RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
+		rte_free(internal);
+		rte_eth_dev_release_port(dev);
+		return -1;
+	}
+
+	/* Presetup the fds to -1 as being not working */
+	for(i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		internal->fds[i] = -1;
+		internal->rxq[i].fd = -1;
+		internal->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	internal->rxq[0].fd = fd;
+	internal->txq[0].fd = fd;
+	internal->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &internal->eth_addr) < 0) {
+		rte_free(internal);
+		rte_eth_dev_release_port(dev);
+		return -1;
+	}
+
+	data = dev->data;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &internal->eth_addr;
+
+	data->nb_rx_queues = (uint16_t)internal->nb_queues;
+	data->nb_tx_queues = (uint16_t)internal->nb_queues;
+	data->drv_name = drivername;
+
+	*eth_dev = dev;
+	*internals = internal;
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, struct tap_info *tap)
+{
+	struct pmd_internals *internals = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+
+	if (pmd_init_internals(name, tap, &internals, &eth_dev) < 0)
+		return -1;
+
+	eth_dev->rx_pkt_burst = pmd_rx_burst;
+	eth_dev->tx_pkt_burst = pmd_tx_burst;
+
+	return 0;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	struct tap_info *tap = (struct tap_info *)extra_args;
+
+	if (value)
+		snprintf(tap->name, sizeof(tap->name), "%s", value);
+	else
+		snprintf(tap->name, sizeof(tap->name), "dtap%d", (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args __rte_unused)
+{
+	struct tap_info *tap = (struct tap_info *)extra_args;
+
+	pmd_link.link_speed = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+	tap->speed = pmd_link.link_speed;
+
+	return 0;
+}
+
+/*
+ * Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_devinit(const char *name, const char *params)
+{
+	int ret = 0;
+	struct rte_kvargs *kvlist;
+	struct tap_info tap_info;
+
+	/* Setup default values */
+	memset(&tap_info, 0, sizeof(tap_info));
+
+	tap_info.speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_info.name, sizeof(tap_info.name), "dtap%d", tap_unit++);
+
+	if ((params == NULL) || (params[0] == '\0')) {
+		RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s\n", name);
+
+		ret = eth_dev_tap_create(name, &tap_info);
+		goto leave;
+	}
+
+	RTE_LOG(INFO, PMD, "Initialize %s with params (%s)\n", name, params);
+
+	kvlist = rte_kvargs_parse(params, valid_arguments);
+	if (!kvlist) {
+		ret = eth_dev_tap_create(name, &tap_info);
+		goto leave;
+	}
+
+	if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_TAP_SPEED_ARG,
+					 &set_interface_speed, &tap_info);
+		if (ret < 0)
+			goto leave;
+	} else
+		set_interface_speed(NULL, NULL, &tap_info);
+
+	if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_TAP_IFACE_ARG,
+					 &set_interface_name, &tap_info);
+		if (ret < 0)
+			goto leave;
+	} else
+		set_interface_name(NULL, NULL, (void *)&tap_info);
+
+	rte_kvargs_free(kvlist);
+
+leave:
+	if (ret == -1)
+		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s\n", name);
+
+	return ret;
+}
+
+/*
+ * detach a TAP device.
+ */
+static int
+rte_pmd_tap_devuninit(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	if (name == NULL)
+		return 0;
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.init = rte_pmd_tap_devinit,
+	.uninit = rte_pmd_tap_devuninit,
+};
+
+DRIVER_REGISTER_VDEV(eth_tap, pmd_tap_drv);
+DRIVER_REGISTER_PARAM_STRING(eth_tap,
+			     "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 1a0095b..bd1d10f 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -129,6 +129,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
 endif # $(CONFIG_RTE_LIBRTE_VHOST)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)   += -lrte_pmd_aesni_mb
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v4] drivers/net:new PMD using tun/tap host interface
  2016-10-04 14:45     ` [PATCH v4] " Keith Wiles
@ 2016-10-11  9:40       ` Ferruh Yigit
  2016-10-11 11:30       ` Michał Mirosław
                         ` (3 subsequent siblings)
  4 siblings, 0 replies; 59+ messages in thread
From: Ferruh Yigit @ 2016-10-11  9:40 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu

Hi Keith,

On 10/4/2016 3:45 PM, Keith Wiles wrote:
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> on the local host. The PMD allows for DPDK and the host to
> communicate using a raw device interface on the host and in
> the DPDK application. The device created is a Tap device with
> a L2 packet header.
> 
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux.
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch.
>      Fix a typo on naming the tap device.
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
>  MAINTAINERS                             |   5 +
>  config/common_linuxapp                  |   2 +
>  doc/guides/nics/tap.rst                 |  84 ++++
>  drivers/net/Makefile                    |   1 +
>  drivers/net/tap/Makefile                |  57 +++
>  drivers/net/tap/rte_eth_tap.c           | 866 ++++++++++++++++++++++++++++++++
>  drivers/net/tap/rte_pmd_tap_version.map |   4 +
>  mk/rte.app.mk                           |   1 +
>  8 files changed, 1020 insertions(+)
>  create mode 100644 doc/guides/nics/tap.rst
>  create mode 100644 drivers/net/tap/Makefile
>  create mode 100644 drivers/net/tap/rte_eth_tap.c
>  create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

This patch needs to be rebased on top of latest next-net, .init &
.uninit are no more used.

Also patch gives a set of checkpatch warnings, fyi.

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v4] drivers/net:new PMD using tun/tap host interface
  2016-10-04 14:45     ` [PATCH v4] " Keith Wiles
  2016-10-11  9:40       ` Ferruh Yigit
@ 2016-10-11 11:30       ` Michał Mirosław
  2016-10-11 20:56         ` Wiles, Keith
  2016-10-11 11:49       ` Ferruh Yigit
                         ` (2 subsequent siblings)
  4 siblings, 1 reply; 59+ messages in thread
From: Michał Mirosław @ 2016-10-11 11:30 UTC (permalink / raw)
  To: Keith Wiles; +Cc: dev, pmatilai, yuanhan.liu

2016-10-04 16:45 GMT+02:00, Keith Wiles <keith.wiles@intel.com>:
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> on the local host. The PMD allows for DPDK and the host to
> communicate using a raw device interface on the host and in
> the DPDK application. The device created is a Tap device with
> a L2 packet header.
[...]
> +static uint16_t
> +pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> +{
> +	int len, n;
> +	struct rte_mbuf *mbuf;
> +	struct rx_queue *rxq = queue;
> +	struct pollfd pfd;
> +	uint16_t num_rx;
> +	unsigned long num_rx_bytes = 0;
> +
> +	pfd.events = POLLIN;
> +	pfd.fd = rxq->fd;
> +	for (num_rx = 0; num_rx < nb_pkts; ) {
> +		n = poll(&pfd, 1, 0);
> +
> +		if (n <= 0)
> +			break;
> +

Considering that syscalls are rather expensive, it would be cheaper to
allocate an mbuf here and free it when read() returns -1 instead of
calling poll() to check whether a packet is waiting. This way you
save a syscall per packet and replace one syscall with one mbuf free
per poll.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v4] drivers/net:new PMD using tun/tap host interface
  2016-10-04 14:45     ` [PATCH v4] " Keith Wiles
  2016-10-11  9:40       ` Ferruh Yigit
  2016-10-11 11:30       ` Michał Mirosław
@ 2016-10-11 11:49       ` Ferruh Yigit
  2016-10-11 21:07         ` Wiles, Keith
  2016-10-11 12:28       ` Ferruh Yigit
  2016-10-11 21:51       ` [PATCH v5] " Keith Wiles
  4 siblings, 1 reply; 59+ messages in thread
From: Ferruh Yigit @ 2016-10-11 11:49 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu

On 10/4/2016 3:45 PM, Keith Wiles wrote:
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> on the local host. The PMD allows for DPDK and the host to
> communicate using a raw device interface on the host and in
> the DPDK application. The device created is a Tap device with
> a L2 packet header.
> 
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux.
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch.
>      Fix a typo on naming the tap device.
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
>  MAINTAINERS                             |   5 +
>  config/common_linuxapp                  |   2 +
>  doc/guides/nics/tap.rst                 |  84 ++++
>  drivers/net/Makefile                    |   1 +
>  drivers/net/tap/Makefile                |  57 +++
>  drivers/net/tap/rte_eth_tap.c           | 866 ++++++++++++++++++++++++++++++++
>  drivers/net/tap/rte_pmd_tap_version.map |   4 +
>  mk/rte.app.mk                           |   1 +
>  8 files changed, 1020 insertions(+)
>  create mode 100644 doc/guides/nics/tap.rst
>  create mode 100644 drivers/net/tap/Makefile
>  create mode 100644 drivers/net/tap/rte_eth_tap.c
>  create mode 100644 drivers/net/tap/rte_pmd_tap_version.map
> 
<>
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 2483dfa..59a2053 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -44,3 +44,5 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
>  CONFIG_RTE_LIBRTE_POWER=y
>  CONFIG_RTE_VIRTIO_USER=y
> +CONFIG_RTE_LIBRTE_PMD_TAP=y

According existing config items, a default value of a config option
should go to config/common_base, and environment specific config file
overwrites it if required.
So this option needs to be added into config/common_base too as disabled
by default.

> +CONFIG_RTE_PMD_TAP_MAX_QUEUES=32

Is the number of max queues really needs to be a config option, I assume
in normal use case user won't need to update this and will use single
queue, if that is true what about pushing this into source code to not
make config file more complex?

> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst

<...>

> +.. code-block:: console
> +
> +   The interfaced name can be changed by adding the iface=foo0
> +   e.g. --vedv=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...

s/vedv/vdev
eth_tap needs to be net_tap as part of unifying device names work

<...>

> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index bc93230..b4afa98 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
>  DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
>  DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap

Rest of the PMDs sorted alphabetically, please do same.

>  
>  ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost

<...>

> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c

<...>

> +
> +static const char *drivername = "Tap PMD";
> +static int tap_unit = 0;

No need to initialize to zero.

<...>

> +
> +struct pmd_internals {
> +	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
> +	uint16_t nb_queues;			/* Number of queues supported */
> +	uint16_t pad0;

Why this padding? Is it reserved?

> +	struct ether_addr eth_addr;	/* Mac address of the device port */
> +
> +	int if_index;			/* IF_INDEX for the port */
> +	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
> +
> +	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
> +	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
> +};
> +
> +/*
> + * Tun/Tap allocation routine
> + *
> + * name is the number of the interface to use, unless NULL to take the host
> + * supplied name.
> + */
> +static int
> +tun_alloc(char * name)

char *name

<...>

> +
> +	/* Always set the fiile descriptor to non-blocking */

s/fiile/file

> +	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
> +		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
> +		perror("F_SETFL, NONBLOCK");
> +		goto error;
> +	}
> +
> +	/* If the name is different that new name as default */
> +	if (name && strcmp(name, ifr.ifr_name))
> +		strcpy(name, ifr.ifr_name);
What about more secure copy?

> +
> +	return fd;
> +
> +error:
> +	if (fd > 0)
> +		close(fd);
> +	return -1;
> +}
> +

<...>

> +
> +static void
> +tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +
> +	dev_info->driver_name = drivername;
> +	dev_info->if_index = internals->if_index;
> +	dev_info->max_mac_addrs = 1;
> +	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
> +	dev_info->max_rx_queues = (uint16_t)internals->nb_queues;
> +	dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
casting to uint16_t is not requires, it is already uint16_t.

> +	dev_info->min_rx_bufsize = 0;
> +	dev_info->pci_dev = NULL;
> +}
> +
> +static void
> +tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
igb_stats?

> +{
> +	unsigned i, imax;
> +	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
> +	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
> +	const struct pmd_internals *internal = dev->data->dev_private;
> +
> +	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
> +		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
> +
> +	for (i = 0; i < imax; i++) {
> +		igb_stats->q_ipackets[i] = internal->rxq[i].stats.ipackets;
> +		igb_stats->q_ibytes[i] = internal->rxq[i].stats.ibytes;
> +		rx_total += igb_stats->q_ipackets[i];
> +		rx_bytes_total += igb_stats->q_ibytes[i];
> +	}
> +
> +	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
> +		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
Do we need to duplicate imax calculation?


> +
> +	for (i = 0; i < imax; i++) {
> +		igb_stats->q_opackets[i] = internal->txq[i].stats.opackets;
> +		igb_stats->q_errors[i] = internal->txq[i].stats.errs;
> +		igb_stats->q_obytes[i] = internal->txq[i].stats.obytes;
> +		tx_total += igb_stats->q_opackets[i];
> +		tx_err_total += igb_stats->q_errors[i];
> +		tx_bytes_total += igb_stats->q_obytes[i];
> +	}
> +
> +	igb_stats->ipackets = rx_total;
> +	igb_stats->ibytes = rx_bytes_total;
> +	igb_stats->opackets = tx_total;
> +	igb_stats->oerrors = tx_err_total;
> +	igb_stats->obytes = tx_bytes_total;
> +}
> +

<...>

> +
> +static int
> +rte_eth_dev_create(const char *name,
> +		   struct rte_eth_dev **eth_dev,
> +		   const struct eth_dev_ops *dev_ops,
> +		   void **internals, size_t internal_size,
> +		   uint16_t flag)
> +{
> +	char buff[RTE_ETH_NAME_MAX_LEN];
> +	int numa_node = rte_socket_id();
> +	struct rte_eth_dev *dev = NULL;
> +	struct rte_eth_dev_data *data = NULL;
> +	void *priv = NULL;
> +
> +	if ((name == NULL) || (eth_dev == NULL) || (dev_ops == NULL) ||
> +	    (internals == NULL) || (internal_size == 0)) {
> +		RTE_PMD_DEBUG_TRACE("Paramters are invalid\n");
> +		return -1;
> +	}
> +
> +	dev = rte_eth_dev_allocate(name);
> +	if (dev == NULL) {
> +		RTE_PMD_DEBUG_TRACE("%s: rte_eth_dev_allocate failed for %s\n",
> +				    name, buff);
> +		goto error;
> +	}
> +
> +	if (flag & RTE_USE_PRIVATE_DATA) {

You may need to save this flag value somewhere in internals, to decide
how to free data later.

> +		/*
> +		 * now do all data allocation - for eth_dev structure, dummy
> +		 * pci driver and internal (private) data
> +		 */
> +		snprintf(buff, sizeof(buff), "D-%s-%d", name, numa_node);
> +		data = rte_zmalloc_socket(buff, sizeof(struct rte_eth_dev_data),
> +					  0, numa_node);
> +		if (data == NULL) {
> +			RTE_PMD_DEBUG_TRACE("%s: Unable to allocate memory\n",
> +					    name);
> +			goto error;
> +		}
> +		/* move the current state of the structure to the new one */
> +		rte_memcpy(data, dev->data, sizeof(struct rte_eth_dev_data));
Why do we need to copy, trying to preserve which data?

> +		dev->data = data;	/* Override the current data pointer */
> +	} else
> +		data = dev->data;
> +
> +	snprintf(buff, sizeof(buff), "I-%s-%d", name, numa_node);
> +	priv = rte_zmalloc_socket(buff, internal_size, 0, numa_node);
> +	if (priv == NULL) {
> +		RTE_PMD_DEBUG_TRACE("Unable to allocate internal memory %lu\n",
> +				    internal_size);
> +		goto error;
> +	}
> +
> +	/* Setup some default values */
> +	dev->dev_ops = dev_ops;
> +	data->dev_private = priv;

> +	data->port_id = dev->data->port_id;
> +	memmove(data->name, dev->data->name, strlen(dev->data->name));
These two assignments are useless, needs to be done before "dev->data =
data" assignment.

> +
> +	dev->driver = NULL;
> +	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
> +	data->kdrv = RTE_KDRV_NONE;
> +	data->numa_node = numa_node;
> +
> +	*eth_dev = dev;
> +	*internals = priv;
> +
> +	return 0;
> +error:
> +	rte_free(priv);
> +
> +	if (flag & RTE_USE_PRIVATE_DATA)
> +		rte_free(data);
> +
> +	rte_eth_dev_release_port(dev);
> +
> +	return -1;
> +}
> +
> +static int
> +pmd_init_internals(const char *name, struct tap_info *tap,
> +		   struct pmd_internals **internals,
> +		   struct rte_eth_dev **eth_dev)
> +{
> +	struct rte_eth_dev *dev = NULL;
> +	struct pmd_internals *internal = NULL;
> +	struct rte_eth_dev_data *data = NULL;
> +	int ret, i, fd = -1;
> +
> +	RTE_LOG(INFO, PMD,
> +		"%s: Create TUN/TAP Ethernet device with %d queues on numa %u\n",
> +		name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
> +
> +	pmd_link.link_speed = tap->speed;
> +
> +	ret = rte_eth_dev_create(tap->name, &dev, &ops,
> +				 (void **)&internal, sizeof(struct pmd_internals),
Why rte_eth_dev_create() get "void **internals" which requires casting,
but not "struct pmd_internals **internals" ?

> +				 RTE_USE_PRIVATE_DATA);
> +	if (ret < 0)
> +		return -1;
> +
> +	strncpy(internal->name, tap->name, sizeof(internal->name));
> +
> +	internal->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
> +
> +	/* Create the first Tap device */
> +	if ((fd = tun_alloc(dev->data->name)) < 0) {
> +		RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
> +		rte_free(internal);
rte_free(dev->data); ?
But needs to check RTE_USE_PRIVATE_DATA ..

> +		rte_eth_dev_release_port(dev);
> +		return -1;
> +	}
> +
> +	/* Presetup the fds to -1 as being not working */
> +	for(i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
> +		internal->fds[i] = -1;
> +		internal->rxq[i].fd = -1;
> +		internal->txq[i].fd = -1;
> +	}
> +
> +	/* Take the TUN/TAP fd and place in the first location */
> +	internal->rxq[0].fd = fd;
> +	internal->txq[0].fd = fd;
> +	internal->fds[0] = fd;
> +
> +	if (pmd_mac_address(fd, dev, &internal->eth_addr) < 0) {
> +		rte_free(internal);
rte_free(dev->data); ?

> +		rte_eth_dev_release_port(dev);
> +		return -1;
> +	}
> +
> +	data = dev->data;
> +
> +	data->dev_link = pmd_link;
> +	data->mac_addrs = &internal->eth_addr;
> +
> +	data->nb_rx_queues = (uint16_t)internal->nb_queues;
> +	data->nb_tx_queues = (uint16_t)internal->nb_queues;
no cast required.

> +	data->drv_name = drivername;
> +
> +	*eth_dev = dev;
> +	*internals = internal;
> +
> +	return 0;
> +}
> +

<...>

> +
> +static int
> +set_interface_speed(const char *key __rte_unused,
> +		    const char *value,
> +		    void *extra_args __rte_unused)
need to drop  __rte_unused for extra_args

> +{
> +	struct tap_info *tap = (struct tap_info *)extra_args;
> +
> +	pmd_link.link_speed = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
> +	tap->speed = pmd_link.link_speed;
> +
> +	return 0;
> +}
> +
> +/*
> + * Open a TAP interface device.
> + */
> +static int
> +rte_pmd_tap_devinit(const char *name, const char *params)
> +{
> +	int ret = 0;
> +	struct rte_kvargs *kvlist;
> +	struct tap_info tap_info;
> +
> +	/* Setup default values */
> +	memset(&tap_info, 0, sizeof(tap_info));
> +
> +	tap_info.speed = ETH_SPEED_NUM_10G;
> +	snprintf(tap_info.name, sizeof(tap_info.name), "dtap%d", tap_unit++);
What about extracting iface name "dtap" into a macro to make it more
visible.

> +
> +	if ((params == NULL) || (params[0] == '\0')) {
> +		RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s\n", name);
> +
> +		ret = eth_dev_tap_create(name, &tap_info);
This "name" is not used at all (except from RTE_LOG), instead tap->name
is used for interface name, so why carying this variable around?

> +		goto leave;
> +	}
> +
> +	RTE_LOG(INFO, PMD, "Initialize %s with params (%s)\n", name, params);
> +
> +	kvlist = rte_kvargs_parse(params, valid_arguments);
> +	if (!kvlist) {
> +		ret = eth_dev_tap_create(name, &tap_info);
> +		goto leave;
> +	}
> +
> +	if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
> +		ret = rte_kvargs_process(kvlist, ETH_TAP_SPEED_ARG,
> +					 &set_interface_speed, &tap_info);
> +		if (ret < 0)
> +			goto leave;
> +	} else
> +		set_interface_speed(NULL, NULL, &tap_info);
This call is redundant, tap_info already has default speed value set.

> +
> +	if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
> +		ret = rte_kvargs_process(kvlist, ETH_TAP_IFACE_ARG,
> +					 &set_interface_name, &tap_info);
> +		if (ret < 0)
> +			goto leave;
> +	} else
> +		set_interface_name(NULL, NULL, (void *)&tap_info);
tap_info->name already set to default value (dtap%d), this call is not
required.

> +
> +	rte_kvargs_free(kvlist);
> +
> +leave:
> +	if (ret == -1)
> +		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s\n", name);
> +
> +	return ret;
> +}
> +
> +/*
> + * detach a TAP device.
> + */
> +static int
> +rte_pmd_tap_devuninit(const char *name)
> +{
> +	struct rte_eth_dev *eth_dev = NULL;
> +	struct pmd_internals *internals;
> +	int i;
> +
> +	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
> +		rte_socket_id());
> +
> +	if (name == NULL)
This check is redundant, eal layer won't call this function with "name
== NULL"

> +		return 0;
> +
> +	/* find the ethdev entry */
> +	eth_dev = rte_eth_dev_allocated(name);
> +	if (eth_dev == NULL)
> +		return 0;
> +
> +	internals = eth_dev->data->dev_private;
> +	for (i = 0; i < internals->nb_queues; i++)
> +		if (internals->fds[i] != -1)
> +			close(internals->fds[i]);
> +
> +	rte_free(eth_dev->data->dev_private);
> +	rte_free(eth_dev->data);
data can be shared?
Don't we need a RTE_USE_PRIVATE_DATA flag check?

> +
> +	rte_eth_dev_release_port(eth_dev);
> +
> +	return 0;
> +}
> +
> +static struct rte_vdev_driver pmd_tap_drv = {
> +	.init = rte_pmd_tap_devinit,
> +	.uninit = rte_pmd_tap_devuninit,
> +};
> +
> +DRIVER_REGISTER_VDEV(eth_tap, pmd_tap_drv);
name convention is now: "net_tap"

> +DRIVER_REGISTER_PARAM_STRING(eth_tap,
> +			     "iface=<string>,speed=N");
> diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
> new file mode 100644
> index 0000000..61463bf
> --- /dev/null
> +++ b/drivers/net/tap/rte_pmd_tap_version.map
> @@ -0,0 +1,4 @@
> +DPDK_16.11 {
> +
> +	local: *;
> +};
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 1a0095b..bd1d10f 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -129,6 +129,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
>  endif # $(CONFIG_RTE_LIBRTE_VHOST)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
please put in alphebetical order

>  
>  ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)   += -lrte_pmd_aesni_mb
> 

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v4] drivers/net:new PMD using tun/tap host interface
  2016-10-04 14:45     ` [PATCH v4] " Keith Wiles
                         ` (2 preceding siblings ...)
  2016-10-11 11:49       ` Ferruh Yigit
@ 2016-10-11 12:28       ` Ferruh Yigit
  2016-10-11 20:57         ` Wiles, Keith
  2016-10-11 21:51       ` [PATCH v5] " Keith Wiles
  4 siblings, 1 reply; 59+ messages in thread
From: Ferruh Yigit @ 2016-10-11 12:28 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu

On 10/4/2016 3:45 PM, Keith Wiles wrote:
> +/*
> + * Open a TAP interface device.
> + */
> +static int
> +rte_pmd_tap_devinit(const char *name, const char *params)
> +{
> +	int ret = 0;
> +	struct rte_kvargs *kvlist;
> +	struct tap_info tap_info;
> +
> +	/* Setup default values */
> +	memset(&tap_info, 0, sizeof(tap_info));
> +
> +	tap_info.speed = ETH_SPEED_NUM_10G;
> +	snprintf(tap_info.name, sizeof(tap_info.name), "dtap%d", tap_unit++);
> +
> +	if ((params == NULL) || (params[0] == '\0')) {
> +		RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s\n", name);
> +
> +		ret = eth_dev_tap_create(name, &tap_info);
> +		goto leave;
> +	}
> +
> +	RTE_LOG(INFO, PMD, "Initialize %s with params (%s)\n", name, params);
> +
> +	kvlist = rte_kvargs_parse(params, valid_arguments);
> +	if (!kvlist) {
> +		ret = eth_dev_tap_create(name, &tap_info);
> +		goto leave;
> +	}
> +
> +	if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
> +		ret = rte_kvargs_process(kvlist, ETH_TAP_SPEED_ARG,
> +					 &set_interface_speed, &tap_info);
> +		if (ret < 0)
> +			goto leave;
> +	} else
> +		set_interface_speed(NULL, NULL, &tap_info);
> +
> +	if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
> +		ret = rte_kvargs_process(kvlist, ETH_TAP_IFACE_ARG,
> +					 &set_interface_name, &tap_info);
> +		if (ret < 0)
> +			goto leave;
> +	} else
> +		set_interface_name(NULL, NULL, (void *)&tap_info);

Also there must be a eth_dev_tap_create() call after this point to use
tap_info struct with custom values, right?
"--vdev eth_tap0,iface=foo0" parameter shouldn't be working with this
code, right?

> +
> +	rte_kvargs_free(kvlist);
> +
> +leave:
> +	if (ret == -1)
> +		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s\n", name);
> +
> +	return ret;
> +}

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v4] drivers/net:new PMD using tun/tap host interface
  2016-10-11 11:30       ` Michał Mirosław
@ 2016-10-11 20:56         ` Wiles, Keith
  2016-10-12  8:14           ` Michał Mirosław
  0 siblings, 1 reply; 59+ messages in thread
From: Wiles, Keith @ 2016-10-11 20:56 UTC (permalink / raw)
  To: Michał Mirosław; +Cc: dev, pmatilai, yuanhan.liu


Regards,
Keith

> On Oct 11, 2016, at 6:30 AM, Michał Mirosław <mirqus@gmail.com> wrote:
> 
> 2016-10-04 16:45 GMT+02:00, Keith Wiles <keith.wiles@intel.com>:
>> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
>> on the local host. The PMD allows for DPDK and the host to
>> communicate using a raw device interface on the host and in
>> the DPDK application. The device created is a Tap device with
>> a L2 packet header.
> [...]
>> +static uint16_t
>> +pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>> +{
>> +	int len, n;
>> +	struct rte_mbuf *mbuf;
>> +	struct rx_queue *rxq = queue;
>> +	struct pollfd pfd;
>> +	uint16_t num_rx;
>> +	unsigned long num_rx_bytes = 0;
>> +
>> +	pfd.events = POLLIN;
>> +	pfd.fd = rxq->fd;
>> +	for (num_rx = 0; num_rx < nb_pkts; ) {
>> +		n = poll(&pfd, 1, 0);
>> +
>> +		if (n <= 0)
>> +			break;
>> +
> 
> Considering that syscalls are rather expensive, it would be cheaper to
> allocate an mbuf here and free it when read() returns -1 instead of
> calling poll() to check whether a packet is waiting. This way you
> save a syscall per packet and replace one syscall with one mbuf free
> per poll.

I made this change, but saw no performance difference in the two methods. Removing poll seems reasonable as it is simpler. TAP is already so slow is why the performance did not change is my guess. Anyone wanting to use TAP as a high performance interface is going to be surprised. I believe the best use case for the TAP interface is for control or exception path.

> 
> Best Regards,
> Michał Mirosław


^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v4] drivers/net:new PMD using tun/tap host interface
  2016-10-11 12:28       ` Ferruh Yigit
@ 2016-10-11 20:57         ` Wiles, Keith
  0 siblings, 0 replies; 59+ messages in thread
From: Wiles, Keith @ 2016-10-11 20:57 UTC (permalink / raw)
  To: Yigit, Ferruh; +Cc: dev, pmatilai, yuanhan.liu


Regards,
Keith

> On Oct 11, 2016, at 7:28 AM, Yigit, Ferruh <ferruh.yigit@intel.com> wrote:
> 
> On 10/4/2016 3:45 PM, Keith Wiles wrote:
>> +/*
>> + * Open a TAP interface device.
>> + */
>> +static int
>> +rte_pmd_tap_devinit(const char *name, const char *params)
>> +{
>> +	int ret = 0;
>> +	struct rte_kvargs *kvlist;
>> +	struct tap_info tap_info;
>> +
>> +	/* Setup default values */
>> +	memset(&tap_info, 0, sizeof(tap_info));
>> +
>> +	tap_info.speed = ETH_SPEED_NUM_10G;
>> +	snprintf(tap_info.name, sizeof(tap_info.name), "dtap%d", tap_unit++);
>> +
>> +	if ((params == NULL) || (params[0] == '\0')) {
>> +		RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s\n", name);
>> +
>> +		ret = eth_dev_tap_create(name, &tap_info);
>> +		goto leave;
>> +	}
>> +
>> +	RTE_LOG(INFO, PMD, "Initialize %s with params (%s)\n", name, params);
>> +
>> +	kvlist = rte_kvargs_parse(params, valid_arguments);
>> +	if (!kvlist) {
>> +		ret = eth_dev_tap_create(name, &tap_info);
>> +		goto leave;
>> +	}
>> +
>> +	if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
>> +		ret = rte_kvargs_process(kvlist, ETH_TAP_SPEED_ARG,
>> +					 &set_interface_speed, &tap_info);
>> +		if (ret < 0)
>> +			goto leave;
>> +	} else
>> +		set_interface_speed(NULL, NULL, &tap_info);
>> +
>> +	if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
>> +		ret = rte_kvargs_process(kvlist, ETH_TAP_IFACE_ARG,
>> +					 &set_interface_name, &tap_info);
>> +		if (ret < 0)
>> +			goto leave;
>> +	} else
>> +		set_interface_name(NULL, NULL, (void *)&tap_info);
> 
> Also there must be a eth_dev_tap_create() call after this point to use
> tap_info struct with custom values, right?
> "--vdev eth_tap0,iface=foo0" parameter shouldn't be working with this
> code, right?

Removed the extra code.

> 
>> +
>> +	rte_kvargs_free(kvlist);
>> +
>> +leave:
>> +	if (ret == -1)
>> +		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s\n", name);
>> +
>> +	return ret;
>> +}
> 

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v4] drivers/net:new PMD using tun/tap host interface
  2016-10-11 11:49       ` Ferruh Yigit
@ 2016-10-11 21:07         ` Wiles, Keith
  0 siblings, 0 replies; 59+ messages in thread
From: Wiles, Keith @ 2016-10-11 21:07 UTC (permalink / raw)
  To: Yigit, Ferruh; +Cc: dev, pmatilai, yuanhan.liu


Regards,
Keith

> On Oct 11, 2016, at 6:49 AM, Yigit, Ferruh <ferruh.yigit@intel.com> wrote:
> 
> On 10/4/2016 3:45 PM, Keith Wiles wrote:
>> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
>> on the local host. The PMD allows for DPDK and the host to
>> communicate using a raw device interface on the host and in
>> the DPDK application. The device created is a Tap device with
>> a L2 packet header.
>> 

Will try to ship out a v5 soon.
>> v4 - merge with latest driver changes
>> v3 - fix includes by removing ifdef for other type besides Linux.
>>     Fix the copyright notice in the Makefile
>> v2 - merge all of the patches into one patch.
>>     Fix a typo on naming the tap device.
>>     Update the maintainers list
>> 
>> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>> ---
>> MAINTAINERS                             |   5 +
>> config/common_linuxapp                  |   2 +
>> doc/guides/nics/tap.rst                 |  84 ++++
>> drivers/net/Makefile                    |   1 +
>> drivers/net/tap/Makefile                |  57 +++
>> drivers/net/tap/rte_eth_tap.c           | 866 ++++++++++++++++++++++++++++++++
>> drivers/net/tap/rte_pmd_tap_version.map |   4 +
>> mk/rte.app.mk                           |   1 +
>> 8 files changed, 1020 insertions(+)
>> create mode 100644 doc/guides/nics/tap.rst
>> create mode 100644 drivers/net/tap/Makefile
>> create mode 100644 drivers/net/tap/rte_eth_tap.c
>> create mode 100644 drivers/net/tap/rte_pmd_tap_version.map
>> 
> <>
>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>> index 2483dfa..59a2053 100644
>> --- a/config/common_linuxapp
>> +++ b/config/common_linuxapp
>> @@ -44,3 +44,5 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
>> CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
>> CONFIG_RTE_LIBRTE_POWER=y
>> CONFIG_RTE_VIRTIO_USER=y
>> +CONFIG_RTE_LIBRTE_PMD_TAP=y
> 
> According existing config items, a default value of a config option
> should go to config/common_base, and environment specific config file
> overwrites it if required.
> So this option needs to be added into config/common_base too as disabled
> by default.

Add the define to common_base as no, plus a comment for Linux only.

> 
>> +CONFIG_RTE_PMD_TAP_MAX_QUEUES=32

Moved this to the .c file as a define.
> 
> Is the number of max queues really needs to be a config option, I assume
> in normal use case user won't need to update this and will use single
> queue, if that is true what about pushing this into source code to not
> make config file more complex?
> 
>> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> 
> <...>
> 
>> +.. code-block:: console
>> +
>> +   The interfaced name can be changed by adding the iface=foo0
>> +   e.g. --vedv=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
> 
> s/vedv/vdev
> eth_tap needs to be net_tap as part of unifying device names work

Fixed.
> 
> <...>
> 
>> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
>> index bc93230..b4afa98 100644
>> --- a/drivers/net/Makefile
>> +++ b/drivers/net/Makefile
>> @@ -55,6 +55,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
>> DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
>> DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
>> DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += xenvirt
>> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
> 
> Rest of the PMDs sorted alphabetically, please do same.

Done.
> 
>> 
>> ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>> DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
> 
> <...>
> 
>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> 
> <...>
> 
>> +
>> +static const char *drivername = "Tap PMD";
>> +static int tap_unit = 0;
> 
> No need to initialize to zero.

Fixed
> 
> <...>
> 
>> +
>> +struct pmd_internals {
>> +	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
>> +	uint16_t nb_queues;			/* Number of queues supported */
>> +	uint16_t pad0;
> 
> Why this padding? Is it reserved?

Removed pad0. I just like to know about gaps in the structures is the reason.
> 
>> +	struct ether_addr eth_addr;	/* Mac address of the device port */
>> +
>> +	int if_index;			/* IF_INDEX for the port */
>> +	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
>> +
>> +	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
>> +	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
>> +};
>> +
>> +/*
>> + * Tun/Tap allocation routine
>> + *
>> + * name is the number of the interface to use, unless NULL to take the host
>> + * supplied name.
>> + */
>> +static int
>> +tun_alloc(char * name)
> 
> char *name

Fixed.
> 
> <...>
> 
>> +
>> +	/* Always set the fiile descriptor to non-blocking */
> 
> s/fiile/file
> 
>> +	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
>> +		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
>> +		perror("F_SETFL, NONBLOCK");
>> +		goto error;
>> +	}
>> +
>> +	/* If the name is different that new name as default */
>> +	if (name && strcmp(name, ifr.ifr_name))
>> +		strcpy(name, ifr.ifr_name);
> What about more secure copy?

Changed to be more secure.
> 
>> +
>> +	return fd;
>> +
>> +error:
>> +	if (fd > 0)
>> +		close(fd);
>> +	return -1;
>> +}
>> +
> 
> <...>
> 
>> +
>> +static void
>> +tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
>> +{
>> +	struct pmd_internals *internals = dev->data->dev_private;
>> +
>> +	dev_info->driver_name = drivername;
>> +	dev_info->if_index = internals->if_index;
>> +	dev_info->max_mac_addrs = 1;
>> +	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
>> +	dev_info->max_rx_queues = (uint16_t)internals->nb_queues;
>> +	dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
> casting to uint16_t is not requires, it is already uint16_t.

Removed
> 
>> +	dev_info->min_rx_bufsize = 0;
>> +	dev_info->pci_dev = NULL;
>> +}
>> +
>> +static void
>> +tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *igb_stats)
> igb_stats?
> 
>> +{
>> +	unsigned i, imax;
>> +	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
>> +	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
>> +	const struct pmd_internals *internal = dev->data->dev_private;
>> +
>> +	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
>> +		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
>> +
>> +	for (i = 0; i < imax; i++) {
>> +		igb_stats->q_ipackets[i] = internal->rxq[i].stats.ipackets;
>> +		igb_stats->q_ibytes[i] = internal->rxq[i].stats.ibytes;
>> +		rx_total += igb_stats->q_ipackets[i];
>> +		rx_bytes_total += igb_stats->q_ibytes[i];
>> +	}
>> +
>> +	imax = (internal->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
>> +		internal->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
> Do we need to duplicate imax calculation?

Removed
> 
> 
>> +
>> +	for (i = 0; i < imax; i++) {
>> +		igb_stats->q_opackets[i] = internal->txq[i].stats.opackets;
>> +		igb_stats->q_errors[i] = internal->txq[i].stats.errs;
>> +		igb_stats->q_obytes[i] = internal->txq[i].stats.obytes;
>> +		tx_total += igb_stats->q_opackets[i];
>> +		tx_err_total += igb_stats->q_errors[i];
>> +		tx_bytes_total += igb_stats->q_obytes[i];
>> +	}
>> +
>> +	igb_stats->ipackets = rx_total;
>> +	igb_stats->ibytes = rx_bytes_total;
>> +	igb_stats->opackets = tx_total;
>> +	igb_stats->oerrors = tx_err_total;
>> +	igb_stats->obytes = tx_bytes_total;
>> +}
>> +
> 
> <...>
> 
>> +
>> +static int
>> +rte_eth_dev_create(const char *name,
>> +		   struct rte_eth_dev **eth_dev,
>> +		   const struct eth_dev_ops *dev_ops,
>> +		   void **internals, size_t internal_size,
>> +		   uint16_t flag)
>> +{
>> +	char buff[RTE_ETH_NAME_MAX_LEN];
>> +	int numa_node = rte_socket_id();
>> +	struct rte_eth_dev *dev = NULL;
>> +	struct rte_eth_dev_data *data = NULL;
>> +	void *priv = NULL;
>> +
>> +	if ((name == NULL) || (eth_dev == NULL) || (dev_ops == NULL) ||
>> +	    (internals == NULL) || (internal_size == 0)) {
>> +		RTE_PMD_DEBUG_TRACE("Paramters are invalid\n");
>> +		return -1;
>> +	}
>> +
>> +	dev = rte_eth_dev_allocate(name);
>> +	if (dev == NULL) {
>> +		RTE_PMD_DEBUG_TRACE("%s: rte_eth_dev_allocate failed for %s\n",
>> +				    name, buff);
>> +		goto error;
>> +	}
>> +
>> +	if (flag & RTE_USE_PRIVATE_DATA) {
> 
> You may need to save this flag value somewhere in internals, to decide
> how to free data later.

Let me look into this one more and see if it is required at all.
> 
>> +		/*
>> +		 * now do all data allocation - for eth_dev structure, dummy
>> +		 * pci driver and internal (private) data
>> +		 */
>> +		snprintf(buff, sizeof(buff), "D-%s-%d", name, numa_node);
>> +		data = rte_zmalloc_socket(buff, sizeof(struct rte_eth_dev_data),
>> +					  0, numa_node);
>> +		if (data == NULL) {
>> +			RTE_PMD_DEBUG_TRACE("%s: Unable to allocate memory\n",
>> +					    name);
>> +			goto error;
>> +		}
>> +		/* move the current state of the structure to the new one */
>> +		rte_memcpy(data, dev->data, sizeof(struct rte_eth_dev_data));
> Why do we need to copy, trying to preserve which data?
> 
>> +		dev->data = data;	/* Override the current data pointer */
>> +	} else
>> +		data = dev->data;
>> +
>> +	snprintf(buff, sizeof(buff), "I-%s-%d", name, numa_node);
>> +	priv = rte_zmalloc_socket(buff, internal_size, 0, numa_node);
>> +	if (priv == NULL) {
>> +		RTE_PMD_DEBUG_TRACE("Unable to allocate internal memory %lu\n",
>> +				    internal_size);
>> +		goto error;
>> +	}
>> +
>> +	/* Setup some default values */
>> +	dev->dev_ops = dev_ops;
>> +	data->dev_private = priv;
> 
>> +	data->port_id = dev->data->port_id;
>> +	memmove(data->name, dev->data->name, strlen(dev->data->name));
> These two assignments are useless, needs to be done before "dev->data =
> data" assignment.

Reworked this code area to remove it.
> 
>> +
>> +	dev->driver = NULL;
>> +	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
>> +	data->kdrv = RTE_KDRV_NONE;
>> +	data->numa_node = numa_node;
>> +
>> +	*eth_dev = dev;
>> +	*internals = priv;
>> +
>> +	return 0;
>> +error:
>> +	rte_free(priv);
>> +
>> +	if (flag & RTE_USE_PRIVATE_DATA)
>> +		rte_free(data);
>> +
>> +	rte_eth_dev_release_port(dev);
>> +
>> +	return -1;
>> +}
>> +
>> +static int
>> +pmd_init_internals(const char *name, struct tap_info *tap,
>> +		   struct pmd_internals **internals,
>> +		   struct rte_eth_dev **eth_dev)
>> +{
>> +	struct rte_eth_dev *dev = NULL;
>> +	struct pmd_internals *internal = NULL;
>> +	struct rte_eth_dev_data *data = NULL;
>> +	int ret, i, fd = -1;
>> +
>> +	RTE_LOG(INFO, PMD,
>> +		"%s: Create TUN/TAP Ethernet device with %d queues on numa %u\n",
>> +		name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
>> +
>> +	pmd_link.link_speed = tap->speed;
>> +
>> +	ret = rte_eth_dev_create(tap->name, &dev, &ops,
>> +				 (void **)&internal, sizeof(struct pmd_internals),
> Why rte_eth_dev_create() get "void **internals" which requires casting,
> but not "struct pmd_internals **internals” ?

Fixed.
> 
>> +				 RTE_USE_PRIVATE_DATA);
>> +	if (ret < 0)
>> +		return -1;
>> +
>> +	strncpy(internal->name, tap->name, sizeof(internal->name));
>> +
>> +	internal->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
>> +
>> +	/* Create the first Tap device */
>> +	if ((fd = tun_alloc(dev->data->name)) < 0) {
>> +		RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
>> +		rte_free(internal);
> rte_free(dev->data); ?
> But needs to check RTE_USE_PRIVATE_DATA ..

See above
> 
>> +		rte_eth_dev_release_port(dev);
>> +		return -1;
>> +	}
>> +
>> +	/* Presetup the fds to -1 as being not working */
>> +	for(i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
>> +		internal->fds[i] = -1;
>> +		internal->rxq[i].fd = -1;
>> +		internal->txq[i].fd = -1;
>> +	}
>> +
>> +	/* Take the TUN/TAP fd and place in the first location */
>> +	internal->rxq[0].fd = fd;
>> +	internal->txq[0].fd = fd;
>> +	internal->fds[0] = fd;
>> +
>> +	if (pmd_mac_address(fd, dev, &internal->eth_addr) < 0) {
>> +		rte_free(internal);
> rte_free(dev->data); ?

Yes Added.
> 
>> +		rte_eth_dev_release_port(dev);
>> +		return -1;
>> +	}
>> +
>> +	data = dev->data;
>> +
>> +	data->dev_link = pmd_link;
>> +	data->mac_addrs = &internal->eth_addr;
>> +
>> +	data->nb_rx_queues = (uint16_t)internal->nb_queues;
>> +	data->nb_tx_queues = (uint16_t)internal->nb_queues;
> no cast required.

Removed
> 
>> +	data->drv_name = drivername;
>> +
>> +	*eth_dev = dev;
>> +	*internals = internal;
>> +
>> +	return 0;
>> +}
>> +
> 
> <...>
> 
>> +
>> +static int
>> +set_interface_speed(const char *key __rte_unused,
>> +		    const char *value,
>> +		    void *extra_args __rte_unused)
> need to drop  __rte_unused for extra_args
> 
>> +{
>> +	struct tap_info *tap = (struct tap_info *)extra_args;
>> +
>> +	pmd_link.link_speed = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
>> +	tap->speed = pmd_link.link_speed;
>> +
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Open a TAP interface device.
>> + */
>> +static int
>> +rte_pmd_tap_devinit(const char *name, const char *params)
>> +{
>> +	int ret = 0;
>> +	struct rte_kvargs *kvlist;
>> +	struct tap_info tap_info;
>> +
>> +	/* Setup default values */
>> +	memset(&tap_info, 0, sizeof(tap_info));
>> +
>> +	tap_info.speed = ETH_SPEED_NUM_10G;
>> +	snprintf(tap_info.name, sizeof(tap_info.name), "dtap%d", tap_unit++);
> What about extracting iface name "dtap" into a macro to make it more
> visible.

Created macro for the default name.
> 
>> +
>> +	if ((params == NULL) || (params[0] == '\0')) {
>> +		RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s\n", name);
>> +
>> +		ret = eth_dev_tap_create(name, &tap_info);
> This "name" is not used at all (except from RTE_LOG), instead tap->name
> is used for interface name, so why carying this variable around?

Fixed and changed the API.
> 
>> +		goto leave;
>> +	}
>> +
>> +	RTE_LOG(INFO, PMD, "Initialize %s with params (%s)\n", name, params);
>> +
>> +	kvlist = rte_kvargs_parse(params, valid_arguments);
>> +	if (!kvlist) {
>> +		ret = eth_dev_tap_create(name, &tap_info);
>> +		goto leave;
>> +	}
>> +
>> +	if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
>> +		ret = rte_kvargs_process(kvlist, ETH_TAP_SPEED_ARG,
>> +					 &set_interface_speed, &tap_info);
>> +		if (ret < 0)
>> +			goto leave;
>> +	} else
>> +		set_interface_speed(NULL, NULL, &tap_info);
> This call is redundant, tap_info already has default speed value set.

Removed
> 
>> +
>> +	if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
>> +		ret = rte_kvargs_process(kvlist, ETH_TAP_IFACE_ARG,
>> +					 &set_interface_name, &tap_info);
>> +		if (ret < 0)
>> +			goto leave;
>> +	} else
>> +		set_interface_name(NULL, NULL, (void *)&tap_info);
> tap_info->name already set to default value (dtap%d), this call is not
> required.

Removed
> 
>> +
>> +	rte_kvargs_free(kvlist);
>> +
>> +leave:
>> +	if (ret == -1)
>> +		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s\n", name);
>> +
>> +	return ret;
>> +}
>> +
>> +/*
>> + * detach a TAP device.
>> + */
>> +static int
>> +rte_pmd_tap_devuninit(const char *name)
>> +{
>> +	struct rte_eth_dev *eth_dev = NULL;
>> +	struct pmd_internals *internals;
>> +	int i;
>> +
>> +	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
>> +		rte_socket_id());
>> +
>> +	if (name == NULL)
> This check is redundant, eal layer won't call this function with "name
> == NULL”

Removed
> 
>> +		return 0;
>> +
>> +	/* find the ethdev entry */
>> +	eth_dev = rte_eth_dev_allocated(name);
>> +	if (eth_dev == NULL)
>> +		return 0;
>> +
>> +	internals = eth_dev->data->dev_private;
>> +	for (i = 0; i < internals->nb_queues; i++)
>> +		if (internals->fds[i] != -1)
>> +			close(internals->fds[i]);
>> +
>> +	rte_free(eth_dev->data->dev_private);
>> +	rte_free(eth_dev->data);
> data can be shared?
> Don't we need a RTE_USE_PRIVATE_DATA flag check?
> 
>> +
>> +	rte_eth_dev_release_port(eth_dev);
>> +
>> +	return 0;
>> +}
>> +
>> +static struct rte_vdev_driver pmd_tap_drv = {
>> +	.init = rte_pmd_tap_devinit,
>> +	.uninit = rte_pmd_tap_devuninit,
>> +};
>> +
>> +DRIVER_REGISTER_VDEV(eth_tap, pmd_tap_drv);
> name convention is now: “net_tap"

Fixed
> 
>> +DRIVER_REGISTER_PARAM_STRING(eth_tap,
>> +			     "iface=<string>,speed=N");
>> diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
>> new file mode 100644
>> index 0000000..61463bf
>> --- /dev/null
>> +++ b/drivers/net/tap/rte_pmd_tap_version.map
>> @@ -0,0 +1,4 @@
>> +DPDK_16.11 {
>> +
>> +	local: *;
>> +};
>> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
>> index 1a0095b..bd1d10f 100644
>> --- a/mk/rte.app.mk
>> +++ b/mk/rte.app.mk
>> @@ -129,6 +129,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
>> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
>> endif # $(CONFIG_RTE_LIBRTE_VHOST)
>> _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
> please put in alphebetical order

Done
> 
>> 
>> ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
>> _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB)   += -lrte_pmd_aesni_mb



^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v5] drivers/net:new PMD using tun/tap host interface
  2016-10-04 14:45     ` [PATCH v4] " Keith Wiles
                         ` (3 preceding siblings ...)
  2016-10-11 12:28       ` Ferruh Yigit
@ 2016-10-11 21:51       ` Keith Wiles
  2016-10-12 14:56         ` Ferruh Yigit
  4 siblings, 1 reply; 59+ messages in thread
From: Keith Wiles @ 2016-10-11 21:51 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit

The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

v5 - merge in changes from list review see related emails.
     fixed checkpatch issues and many minor edits
v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux.
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch.
     Fix a typo on naming the tap device.
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_base                      |   5 +
 config/common_linuxapp                  |   1 +
 doc/guides/nics/tap.rst                 | 138 ++++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 767 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 9 files changed, 979 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index cd8d167..f905709 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -394,6 +394,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_base b/config/common_base
index f5d2eff..356c631 100644
--- a/config/common_base
+++ b/config/common_base
@@ -592,3 +592,8 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
 CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
+
+#
+# Set TAP PMD to 'n' as it is only supported in Linux for now.
+#
+CONFIG_RTE_LIBRTE_PMD_TAP=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..782b503 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..eed81ec
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,138 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================================
+
+The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
+host. The PMD allows for DPDK and the host to communicate using a raw device
+interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw format
+with a L2 header. The usage for a TAP PMD is for connectivity to the local host
+using a TAP interface. When the TAP PMD is initialized it will create a number
+of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
+commands can be used to assign and query the virtual like device.
+
+These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
+with being able to be used as a network connection to the DPDK application. The
+method enable one or more interfaces is to use the --vdev=eth_tap option on the
+DPDK application  command line. Each --vdev=eth_tap option give will create an
+interface named dtap0, dtap1, ... and so forth.
+
+.. code-block:: console
+
+   The interfaced name can be changed by adding the iface=foo0
+   e.g. --vdev=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
+
+.. code-block:: console
+
+   Also the speed of the interface can be changed from 10G to whatever number
+   needed, but the interface does not enforce that speed.
+   e.g. --vdev=eth_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
+of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
+others to communicate with the DPDK application. The DPDK application may not
+understand network protocols like IPv4/6, UDP or TCP unless the application has
+been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands:
+
+.. code-block:: console
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you account
+for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal interface.
+
+A very crude test you can do the following:
+
+Apply the patch below and make sure you have socat installed on your system.
+
+Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
+used to build the dpdk you pulled down.
+
+Run pktgen from the pktgen repo directory in an xterm:
+    Note: change the -b options to blacklist all of your physical ports. The
+          following command line is all one line.
+
+.. code-block:: console
+
+    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
+     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
+     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
+     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
+     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
+      -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                  \
+     -f themes/black-yellow.theme
+
+I normally put the line above into a file called doit.sh, just to allow for a
+simple execution of the line above.
+
+You can leave the -f themes/black-yellow.theme off if the colors does not work
+for your system configuration.
+
+Verify with 'ifconfig -a' command in a different xterm window, should have a
+dtap0 and dtap1 interfaces created.
+
+Next set the links for the two interfaces to up via the commands below.
+
+.. code-block:: console
+
+    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Then use socat to create a loopback for the two interfaces.
+
+.. code-block:: console
+
+    sudo socat interface:dtap0 interface:dtap1
+
+Then on the Pktgen command line interface you can start sending packets using
+the commands 'start 0' and 'start 1' or you can start both at the same time
+with 'start all'. The command 'str' is an alias for 'start all' and 'stp' is
+an alias for 'stop all'.
+
+While running you should see the 64 byte counters increasing to verify the
+traffic is being looped back. You can use 'set all size XXX' to change the
+size of the packets after you stop the traffic. Use the pktgen 'help' command
+to see a list of all commands. You can also use the '-f' option to load commands
+at startup.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..e366a85 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..c13aa1b
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,767 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+#define DEFAULT_TAP_NAME        "dtap"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+#define RTE_PMD_TAP_MAX_QUEUES	32
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct tap_info {
+	char name[RTE_ETH_NAME_MAX_LEN]; /* Interface name supplied/given */
+	int speed;			 /* Speed of interface */
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;		/* Number of queues supported */
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/*
+ * Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char *name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) &&
+			(RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+			ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the file descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		snprintf(name, RTE_ETH_NAME_MAX_LEN-1, "%s", ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/*
+ * Callback to handle the rx burst of packets to the correct interface and
+ * file descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(mbuf == NULL)) {
+			RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/*
+ * Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/*
+ * This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = internals->nb_queues;
+	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
+{
+	unsigned i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *pmd = dev->data->dev_private;
+
+	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
+		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
+		rx_total += tap_stats->q_ipackets[i];
+		rx_bytes_total += tap_stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
+		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
+		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
+		tx_total += tap_stats->q_opackets[i];
+		tx_err_total += tap_stats->q_errors[i];
+		tx_bytes_total += tap_stats->q_obytes[i];
+	}
+
+	tap_stats->ipackets = rx_total;
+	tap_stats->ibytes = rx_bytes_total;
+	tap_stats->opackets = tx_total;
+	tap_stats->oerrors = tx_err_total;
+	tap_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->rxq[i].stats.ipackets = 0;
+		pmd->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->txq[i].stats.opackets = 0;
+		pmd->txq[i].stats.errs = 0;
+		pmd->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	if ((fd = rx->fd) < 0)
+		if ((fd = tx->fd) < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			if ((fd = tun_alloc(dev->data->name)) < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
+					dev->data->name);
+				return -1;
+			}
+		}
+
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || (mp == NULL)) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
+			internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t) (rte_pktmbuf_data_room_size(mp) -
+			       RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret = -1;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return ret;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || (dev == NULL) || (addr == NULL))
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/*
+	 * Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(char *tap_name)
+{
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *pmd = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"Create TUN/TAP Ethernet device with %d queues on numa %u\n",
+		 RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
+	if (data == NULL)
+		goto error_exit;
+
+	pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
+	if (pmd == NULL)
+		goto error_exit;
+
+	dev = rte_eth_dev_allocate(tap_name);
+	if (dev == NULL)
+		goto error_exit;
+
+	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
+
+	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Setup some default values */
+	data->dev_private = pmd;
+	data->port_id = dev->data->port_id;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = drivername;
+	data->numa_node = numa_node;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &pmd->eth_addr;
+	data->nb_rx_queues = pmd->nb_queues;
+	data->nb_tx_queues = pmd->nb_queues;
+	data->drv_name = drivername;
+
+	dev->data = data;
+	dev->dev_ops = &ops;
+	dev->driver = NULL;
+	dev->rx_pkt_burst = pmd_rx_burst;
+	dev->tx_pkt_burst = pmd_tx_burst;
+	snprintf(dev->data->name, sizeof(dev->data->name), "%s", tap_name);
+
+	/* Create the first Tap device */
+	if ((fd = tun_alloc(tap_name)) < 0) {
+		RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
+		rte_free(pmd);
+		rte_eth_dev_release_port(dev);
+		return -EINVAL;
+	}
+
+	/* Presetup the fds to -1 as being not working */
+	for(i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		pmd->fds[i] = -1;
+		pmd->rxq[i].fd = -1;
+		pmd->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	pmd->rxq[0].fd = fd;
+	pmd->txq[0].fd = fd;
+	pmd->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
+		rte_free(pmd);
+		rte_eth_dev_release_port(dev);
+		return -EINVAL;
+	}
+
+	return 0;
+
+error_exit:
+	RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+
+	rte_free(data);
+	rte_free(pmd);
+
+	rte_eth_dev_release_port(dev);
+
+	return -EINVAL;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	char *name = (char *)extra_args;
+
+	if (value)
+		snprintf(name, RTE_ETH_NAME_MAX_LEN-1, "%s", value);
+	else
+		snprintf(name, RTE_ETH_NAME_MAX_LEN-1, "%s%d",
+			 DEFAULT_TAP_NAME, (tap_unit-1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args)
+{
+	*(int *)extra_args = (value)? atoi(value) : ETH_SPEED_NUM_10G;
+
+	return 0;
+}
+
+/*
+ * Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_probe(const char *name, const char *params)
+{
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	int speed;
+	char tap_name[RTE_ETH_NAME_MAX_LEN];
+
+	speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_name, sizeof(tap_name), "%s%d",
+		 DEFAULT_TAP_NAME, tap_unit++);
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
+		name, tap_name);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_SPEED_ARG,
+							 &set_interface_speed,
+						         &speed);
+				if (ret == -1)
+					goto leave;
+			}
+
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_IFACE_ARG,
+							 &set_interface_name,
+						         tap_name);
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = speed;
+
+	ret = eth_dev_tap_create(tap_name);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s as %s\n",
+			name, tap_name);
+		tap_unit--;		/* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/*
+ * detach a TAP device.
+ */
+static int
+rte_pmd_tap_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.probe = rte_pmd_tap_probe,
+	.remove = rte_pmd_tap_remove,
+};
+
+DRIVER_REGISTER_VDEV(net_tap, pmd_tap_drv);
+DRIVER_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index ac50a21..40d16f7 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -123,6 +123,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede -lz
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v4] drivers/net:new PMD using tun/tap host interface
  2016-10-11 20:56         ` Wiles, Keith
@ 2016-10-12  8:14           ` Michał Mirosław
  0 siblings, 0 replies; 59+ messages in thread
From: Michał Mirosław @ 2016-10-12  8:14 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev, pmatilai, yuanhan.liu

2016-10-11 22:56 GMT+02:00 Wiles, Keith <keith.wiles@intel.com>:
>> On Oct 11, 2016, at 6:30 AM, Michał Mirosław <mirqus@gmail.com> wrote:
>>
>> 2016-10-04 16:45 GMT+02:00, Keith Wiles <keith.wiles@intel.com>:
>>> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
>>> on the local host. The PMD allows for DPDK and the host to
>>> communicate using a raw device interface on the host and in
>>> the DPDK application. The device created is a Tap device with
>>> a L2 packet header.
>> [...]
>>> +static uint16_t
>>> +pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>>> +{
>>> +    int len, n;
>>> +    struct rte_mbuf *mbuf;
>>> +    struct rx_queue *rxq = queue;
>>> +    struct pollfd pfd;
>>> +    uint16_t num_rx;
>>> +    unsigned long num_rx_bytes = 0;
>>> +
>>> +    pfd.events = POLLIN;
>>> +    pfd.fd = rxq->fd;
>>> +    for (num_rx = 0; num_rx < nb_pkts; ) {
>>> +            n = poll(&pfd, 1, 0);
>>> +
>>> +            if (n <= 0)
>>> +                    break;
>>> +
>>
>> Considering that syscalls are rather expensive, it would be cheaper to
>> allocate an mbuf here and free it when read() returns -1 instead of
>> calling poll() to check whether a packet is waiting. This way you
>> save a syscall per packet and replace one syscall with one mbuf free
>> per poll.
>
> I made this change, but saw no performance difference in the two methods. Removing poll seems reasonable as it is simpler. TAP is already so slow is why the performance did not change is my guess. Anyone wanting to use TAP as a high performance interface is going to be surprised. I believe the best use case for the TAP interface is for control or exception path.

Agreed, TAP does not look like designed for performance as a first goal.

You could do the same simplification for TX path, BTW.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v5] drivers/net:new PMD using tun/tap host interface
  2016-10-11 21:51       ` [PATCH v5] " Keith Wiles
@ 2016-10-12 14:56         ` Ferruh Yigit
  2016-10-12 18:19           ` Wiles, Keith
  0 siblings, 1 reply; 59+ messages in thread
From: Ferruh Yigit @ 2016-10-12 14:56 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu

On 10/11/2016 10:51 PM, Keith Wiles wrote:
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> on the local host. The PMD allows for DPDK and the host to
> communicate using a raw device interface on the host and in
> the DPDK application. The device created is a Tap device with
> a L2 packet header.
> 
> v5 - merge in changes from list review see related emails.
>      fixed checkpatch issues and many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux.
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch.
>      Fix a typo on naming the tap device.
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---

<..>

> diff --git a/config/common_base b/config/common_base
> index f5d2eff..356c631 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -592,3 +592,8 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
>  CONFIG_RTE_TEST_PMD=y
>  CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
>  CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
> +
> +#
> +# Set TAP PMD to 'n' as it is only supported in Linux for now.

This comments moved to final .config and creates confusion, can we
remove it if you don't have a strong option to keep it?

<..>

> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> new file mode 100644
> index 0000000..eed81ec
> --- /dev/null
> +++ b/doc/guides/nics/tap.rst

<..>

> +.. code-block:: console
> +
> +   The interfaced name can be changed by adding the iface=foo0
> +   e.g. --vdev=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...

For all file:
%s/eth_tap/net_tap/g, there are multiple lines with this usage


<..>

> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> new file mode 100644
> index 0000000..c13aa1b
> --- /dev/null
> +++ b/drivers/net/tap/rte_eth_tap.c

<..>

> +
> +struct tap_info {
> +	char name[RTE_ETH_NAME_MAX_LEN]; /* Interface name supplied/given */
> +	int speed;			 /* Speed of interface */
> +};

This struct can go away, it is not used, since with the updated code
rte_pmd_tap_probe() used tap_name and speed as seperate variables
instead of struct.


<..>

> +
> +	/* If the name is different that new name as default */
> +	if (name && strcmp(name, ifr.ifr_name))
> +		snprintf(name, RTE_ETH_NAME_MAX_LEN-1, "%s", ifr.ifr_name);

syntax, space around "-"

<..>

> +
> +static void
> +tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
> +{
> +	unsigned i, imax;

checkpatch complain about not using "unsigned int"


<..>

> +static int
> +tap_setup_queue(struct rte_eth_dev *dev,
> +		struct pmd_internals *internals,
> +		uint16_t qid)
> +{
> +	struct rx_queue *rx = &internals->rxq[qid];
> +	struct tx_queue *tx = &internals->txq[qid];
> +	int fd;
> +
> +	if ((fd = rx->fd) < 0)
> +		if ((fd = tx->fd) < 0) {
> +			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
> +				dev->data->name, qid);
> +			if ((fd = tun_alloc(dev->data->name)) < 0) {

checkpatch complain about assignment in the if condition


<..>

> +	/* Now get the space available for data in the mbuf */
> +	buf_size = (uint16_t) (rte_pktmbuf_data_room_size(mp) -

syntax, no space after cast


<..>

> +	/* Create the first Tap device */
> +	if ((fd = tun_alloc(tap_name)) < 0) {

checkpatch complains about assignment in if condition

> +		RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
> +		rte_free(pmd);

rte_free(data) or goto error_exit; ?

> +		rte_eth_dev_release_port(dev);
> +		return -EINVAL;
> +	}
> +
> +	/* Presetup the fds to -1 as being not working */
> +	for(i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
> +		pmd->fds[i] = -1;
> +		pmd->rxq[i].fd = -1;
> +		pmd->txq[i].fd = -1;
> +	}
> +
> +	/* Take the TUN/TAP fd and place in the first location */
> +	pmd->rxq[0].fd = fd;
> +	pmd->txq[0].fd = fd;
> +	pmd->fds[0] = fd;
> +
> +	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
> +		rte_free(pmd);

rte_free(data) or goto error_exit; ?


<..>

> +static int
> +set_interface_name(const char *key __rte_unused,
> +		   const char *value,
> +		   void *extra_args)
> +{
> +	char *name = (char *)extra_args;
> +
> +	if (value)
> +		snprintf(name, RTE_ETH_NAME_MAX_LEN-1, "%s", value);

syntax, space around "-"

> +	else
> +		snprintf(name, RTE_ETH_NAME_MAX_LEN-1, "%s%d",

syntax, space around "-"

> +			 DEFAULT_TAP_NAME, (tap_unit-1));

syntax, space around "-"

> +
> +	return 0;
> +}
> +
> +static int
> +set_interface_speed(const char *key __rte_unused,
> +		    const char *value,
> +		    void *extra_args)
> +{
> +	*(int *)extra_args = (value)? atoi(value) : ETH_SPEED_NUM_10G;

syntax, space around "?"

<..>

> +		kvlist = rte_kvargs_parse(params, valid_arguments);
> +		if (kvlist) {
> +			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
> +				ret = rte_kvargs_process(kvlist,
> +							 ETH_TAP_SPEED_ARG,
> +							 &set_interface_speed,
> +						         &speed);

whitespace, space and tab mixed

> +				if (ret == -1)
> +					goto leave;
> +			}
> +
> +			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
> +				ret = rte_kvargs_process(kvlist,
> +							 ETH_TAP_IFACE_ARG,
> +							 &set_interface_name,
> +						         tap_name);

whitespace, space and tab mixed

<..>

> +static int
> +rte_pmd_tap_remove(const char *name)
> +{
> +	struct rte_eth_dev *eth_dev = NULL;
> +	struct pmd_internals *internals;
> +	int i;
> +
> +	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
> +		rte_socket_id());
> +
> +	/* find the ethdev entry */
> +	eth_dev = rte_eth_dev_allocated(name);

This may cause a problem. Device created by tap_name, but searching with
name. I suspenct this will always return NULL.

<..>

Thanks,
ferru

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v5] drivers/net:new PMD using tun/tap host interface
  2016-10-12 14:56         ` Ferruh Yigit
@ 2016-10-12 18:19           ` Wiles, Keith
  2016-10-12 19:57             ` Wiles, Keith
  0 siblings, 1 reply; 59+ messages in thread
From: Wiles, Keith @ 2016-10-12 18:19 UTC (permalink / raw)
  To: Yigit, Ferruh; +Cc: <dev@dpdk.org>, Panu Matilainen, yuanhan.liu


Regards,
Keith

> On Oct 12, 2016, at 9:56 AM, Yigit, Ferruh <ferruh.yigit@intel.com> wrote:
> 
> On 10/11/2016 10:51 PM, Keith Wiles wrote:
>> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
>> on the local host. The PMD allows for DPDK and the host to
>> communicate using a raw device interface on the host and in
>> the DPDK application. The device created is a Tap device with
>> a L2 packet header.
>> 
>> v5 - merge in changes from list review see related emails.
>>     fixed checkpatch issues and many minor edits
>> v4 - merge with latest driver changes
>> v3 - fix includes by removing ifdef for other type besides Linux.
>>     Fix the copyright notice in the Makefile
>> v2 - merge all of the patches into one patch.
>>     Fix a typo on naming the tap device.
>>     Update the maintainers list
>> 
>> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>> ---
> 
> <..>
> 
>> diff --git a/config/common_base b/config/common_base
>> index f5d2eff..356c631 100644
>> --- a/config/common_base
>> +++ b/config/common_base
>> @@ -592,3 +592,8 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
>> CONFIG_RTE_TEST_PMD=y
>> CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
>> CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
>> +
>> +#
>> +# Set TAP PMD to 'n' as it is only supported in Linux for now.
> 
> This comments moved to final .config and creates confusion, can we
> remove it if you don't have a strong option to keep it?

What do you mean, the statement is confusing or causes problems?

> 
> <..>
> 
>> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
>> new file mode 100644
>> index 0000000..eed81ec
>> --- /dev/null
>> +++ b/doc/guides/nics/tap.rst
> 
> <..>
> 
>> +.. code-block:: console
>> +
>> +   The interfaced name can be changed by adding the iface=foo0
>> +   e.g. --vdev=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
> 
> For all file:
> %s/eth_tap/net_tap/g, there are multiple lines with this usage

Missed that one.

> 
> 
> <..>
> 
>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
>> new file mode 100644
>> index 0000000..c13aa1b
>> --- /dev/null
>> +++ b/drivers/net/tap/rte_eth_tap.c
> 
> <..>
> 
>> +
>> +struct tap_info {
>> +	char name[RTE_ETH_NAME_MAX_LEN]; /* Interface name supplied/given */
>> +	int speed;			 /* Speed of interface */
>> +};
> 
> This struct can go away, it is not used, since with the updated code
> rte_pmd_tap_probe() used tap_name and speed as seperate variables
> instead of struct.
> 

OK.

> 
> <..>
> 
>> +
>> +	/* If the name is different that new name as default */
>> +	if (name && strcmp(name, ifr.ifr_name))
>> +		snprintf(name, RTE_ETH_NAME_MAX_LEN-1, "%s", ifr.ifr_name);
> 
> syntax, space around "-"
> 
> <..>
> 
>> +
>> +static void
>> +tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
>> +{
>> +	unsigned i, imax;
> 
> checkpatch complain about not using "unsigned int”

I ran checkpatch on the patch and saw no errors reported via the scripts/checkpatch.sh. Which checkpatch are you using here?
> 
> 
> <..>
> 
>> +static int
>> +tap_setup_queue(struct rte_eth_dev *dev,
>> +		struct pmd_internals *internals,
>> +		uint16_t qid)
>> +{
>> +	struct rx_queue *rx = &internals->rxq[qid];
>> +	struct tx_queue *tx = &internals->txq[qid];
>> +	int fd;
>> +
>> +	if ((fd = rx->fd) < 0)
>> +		if ((fd = tx->fd) < 0) {
>> +			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
>> +				dev->data->name, qid);
>> +			if ((fd = tun_alloc(dev->data->name)) < 0) {
> 
> checkpatch complain about assignment in the if condition
> 
> 
> <..>
> 
>> +	/* Now get the space available for data in the mbuf */
>> +	buf_size = (uint16_t) (rte_pktmbuf_data_room_size(mp) -
> 
> syntax, no space after cast
> 
> 
> <..>
> 
>> +	/* Create the first Tap device */
>> +	if ((fd = tun_alloc(tap_name)) < 0) {
> 
> checkpatch complains about assignment in if condition
> 
>> +		RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n", dev->data->name);
>> +		rte_free(pmd);
> 
> rte_free(data) or goto error_exit; ?
> 
>> +		rte_eth_dev_release_port(dev);
>> +		return -EINVAL;
>> +	}
>> +
>> +	/* Presetup the fds to -1 as being not working */
>> +	for(i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
>> +		pmd->fds[i] = -1;
>> +		pmd->rxq[i].fd = -1;
>> +		pmd->txq[i].fd = -1;
>> +	}
>> +
>> +	/* Take the TUN/TAP fd and place in the first location */
>> +	pmd->rxq[0].fd = fd;
>> +	pmd->txq[0].fd = fd;
>> +	pmd->fds[0] = fd;
>> +
>> +	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
>> +		rte_free(pmd);
> 
> rte_free(data) or goto error_exit; ?
> 
> 
> <..>
> 
>> +static int
>> +set_interface_name(const char *key __rte_unused,
>> +		   const char *value,
>> +		   void *extra_args)
>> +{
>> +	char *name = (char *)extra_args;
>> +
>> +	if (value)
>> +		snprintf(name, RTE_ETH_NAME_MAX_LEN-1, "%s", value);
> 
> syntax, space around "-"
> 
>> +	else
>> +		snprintf(name, RTE_ETH_NAME_MAX_LEN-1, "%s%d",
> 
> syntax, space around "-"
> 
>> +			 DEFAULT_TAP_NAME, (tap_unit-1));
> 
> syntax, space around "-"
> 
>> +
>> +	return 0;
>> +}
>> +
>> +static int
>> +set_interface_speed(const char *key __rte_unused,
>> +		    const char *value,
>> +		    void *extra_args)
>> +{
>> +	*(int *)extra_args = (value)? atoi(value) : ETH_SPEED_NUM_10G;
> 
> syntax, space around "?"
> 
> <..>
> 
>> +		kvlist = rte_kvargs_parse(params, valid_arguments);
>> +		if (kvlist) {
>> +			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
>> +				ret = rte_kvargs_process(kvlist,
>> +							 ETH_TAP_SPEED_ARG,
>> +							 &set_interface_speed,
>> +						         &speed);
> 
> whitespace, space and tab mixed
> 
>> +				if (ret == -1)
>> +					goto leave;
>> +			}
>> +
>> +			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
>> +				ret = rte_kvargs_process(kvlist,
>> +							 ETH_TAP_IFACE_ARG,
>> +							 &set_interface_name,
>> +						         tap_name);
> 
> whitespace, space and tab mixed
> 
> <..>
> 
>> +static int
>> +rte_pmd_tap_remove(const char *name)
>> +{
>> +	struct rte_eth_dev *eth_dev = NULL;
>> +	struct pmd_internals *internals;
>> +	int i;
>> +
>> +	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
>> +		rte_socket_id());
>> +
>> +	/* find the ethdev entry */
>> +	eth_dev = rte_eth_dev_allocated(name);
> 
> This may cause a problem. Device created by tap_name, but searching with
> name. I suspenct this will always return NULL.
> 
> <..>
> 
> Thanks,
> ferru


^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v5] drivers/net:new PMD using tun/tap host interface
  2016-10-12 18:19           ` Wiles, Keith
@ 2016-10-12 19:57             ` Wiles, Keith
  0 siblings, 0 replies; 59+ messages in thread
From: Wiles, Keith @ 2016-10-12 19:57 UTC (permalink / raw)
  To: Yigit, Ferruh; +Cc: <dev@dpdk.org>, Panu Matilainen, yuanhan.liu


Regards,
Keith

> On Oct 12, 2016, at 1:19 PM, Wiles, Keith <keith.wiles@intel.com> wrote:
> 
> 
> Regards,
> Keith
> 
>> On Oct 12, 2016, at 9:56 AM, Yigit, Ferruh <ferruh.yigit@intel.com> wrote:
>> 
>>> +
>>> +static void
>>> +tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
>>> +{
>>> +	unsigned i, imax;
>> 
>> checkpatch complain about not using "unsigned int”
> 
> I ran checkpatch on the patch and saw no errors reported via the scripts/checkpatch.sh. Which checkpatch are you using here?

OK, the scripts/checkpatch.sh does not seem to report any failures, but running checkpatch.pl does report problems. Is the scripts/checkpatch.sh script suppose to display the errors or what?

It seems odd for the script file not to display warnings and errors, unless it is just for validating patch. I would expect the script show the problems normally or at least with an option.



^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v6] drivers/net:new PMD using tun/tap host interface
  2016-09-21  2:00   ` [PATCH v3] drivers/net:new PMD using tun/tap host interface Keith Wiles
  2016-10-04 14:45     ` [PATCH v4] " Keith Wiles
@ 2016-10-12 20:54     ` Keith Wiles
  2016-10-13 14:41       ` Ferruh Yigit
  2016-10-13 15:36     ` [PATCH v7] " Keith Wiles
                       ` (2 subsequent siblings)
  4 siblings, 1 reply; 59+ messages in thread
From: Keith Wiles @ 2016-10-12 20:54 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit

The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

v6 - fixed the checkpatch issues
v5 - merge in changes from list review see related emails
     fixed many minor edits
v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch
     Fix a typo on naming the tap device
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_base                      |   5 +
 config/common_linuxapp                  |   1 +
 doc/guides/nics/tap.rst                 | 138 ++++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 759 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 9 files changed, 971 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index cd8d167..f905709 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -394,6 +394,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_base b/config/common_base
index f5d2eff..356c631 100644
--- a/config/common_base
+++ b/config/common_base
@@ -592,3 +592,8 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
 CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
+
+#
+# Set TAP PMD to 'n' as it is only supported in Linux for now.
+#
+CONFIG_RTE_LIBRTE_PMD_TAP=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..782b503 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..eed81ec
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,138 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================================
+
+The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
+host. The PMD allows for DPDK and the host to communicate using a raw device
+interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw format
+with a L2 header. The usage for a TAP PMD is for connectivity to the local host
+using a TAP interface. When the TAP PMD is initialized it will create a number
+of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
+commands can be used to assign and query the virtual like device.
+
+These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
+with being able to be used as a network connection to the DPDK application. The
+method enable one or more interfaces is to use the --vdev=eth_tap option on the
+DPDK application  command line. Each --vdev=eth_tap option give will create an
+interface named dtap0, dtap1, ... and so forth.
+
+.. code-block:: console
+
+   The interfaced name can be changed by adding the iface=foo0
+   e.g. --vdev=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...
+
+.. code-block:: console
+
+   Also the speed of the interface can be changed from 10G to whatever number
+   needed, but the interface does not enforce that speed.
+   e.g. --vdev=eth_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
+of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
+others to communicate with the DPDK application. The DPDK application may not
+understand network protocols like IPv4/6, UDP or TCP unless the application has
+been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands:
+
+.. code-block:: console
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you account
+for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal interface.
+
+A very crude test you can do the following:
+
+Apply the patch below and make sure you have socat installed on your system.
+
+Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
+used to build the dpdk you pulled down.
+
+Run pktgen from the pktgen repo directory in an xterm:
+    Note: change the -b options to blacklist all of your physical ports. The
+          following command line is all one line.
+
+.. code-block:: console
+
+    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
+     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
+     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
+     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
+     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
+      -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                  \
+     -f themes/black-yellow.theme
+
+I normally put the line above into a file called doit.sh, just to allow for a
+simple execution of the line above.
+
+You can leave the -f themes/black-yellow.theme off if the colors does not work
+for your system configuration.
+
+Verify with 'ifconfig -a' command in a different xterm window, should have a
+dtap0 and dtap1 interfaces created.
+
+Next set the links for the two interfaces to up via the commands below.
+
+.. code-block:: console
+
+    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Then use socat to create a loopback for the two interfaces.
+
+.. code-block:: console
+
+    sudo socat interface:dtap0 interface:dtap1
+
+Then on the Pktgen command line interface you can start sending packets using
+the commands 'start 0' and 'start 1' or you can start both at the same time
+with 'start all'. The command 'str' is an alias for 'start all' and 'stp' is
+an alias for 'stop all'.
+
+While running you should see the 64 byte counters increasing to verify the
+traffic is being looped back. You can use 'set all size XXX' to change the
+size of the packets after you stop the traffic. Use the pktgen 'help' command
+to see a list of all commands. You can also use the '-f' option to load commands
+at startup.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..e366a85 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..7588e1e
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,759 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+#define DEFAULT_TAP_NAME        "dtap"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+#define RTE_PMD_TAP_MAX_QUEUES	32
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct tap_info {
+	char name[RTE_ETH_NAME_MAX_LEN]; /* Interface name supplied/given */
+	int speed;			 /* Speed of interface */
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;		/* Number of queues supported */
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/* Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char *name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) &&
+			(RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+			ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the file descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/* Callback to handle the rx burst of packets to the correct interface and
+ * file descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(!mbuf)) {
+			RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/* Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/* This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = internals->nb_queues;
+	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
+{
+	unsigned int i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *pmd = dev->data->dev_private;
+
+	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
+		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
+		rx_total += tap_stats->q_ipackets[i];
+		rx_bytes_total += tap_stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
+		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
+		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
+		tx_total += tap_stats->q_opackets[i];
+		tx_err_total += tap_stats->q_errors[i];
+		tx_bytes_total += tap_stats->q_obytes[i];
+	}
+
+	tap_stats->ipackets = rx_total;
+	tap_stats->ibytes = rx_bytes_total;
+	tap_stats->opackets = tx_total;
+	tap_stats->oerrors = tx_err_total;
+	tap_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->rxq[i].stats.ipackets = 0;
+		pmd->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->txq[i].stats.opackets = 0;
+		pmd->txq[i].stats.errs = 0;
+		pmd->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	fd = rx->fd;
+	if (fd < 0) {
+		fd = tx->fd;
+		if (fd < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			fd = tun_alloc(dev->data->name);
+			if (fd < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
+					dev->data->name);
+				return -1;
+			}
+		}
+	}
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = fd;
+	tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || !mp) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
+			internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
+				RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret = -1;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return ret;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || !dev || !addr)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, char *tap_name)
+{
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *pmd = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TAP Ethernet device with %d queues on numa %u\n",
+		 name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
+	if (!data)
+		goto error_exit;
+
+	pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
+	if (!pmd)
+		goto error_exit;
+
+	/* Use the name and not the tap_name */
+	dev = rte_eth_dev_allocate(name);
+	if (!dev)
+		goto error_exit;
+
+	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
+
+	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Setup some default values */
+	data->dev_private = pmd;
+	data->port_id = dev->data->port_id;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = drivername;
+	data->numa_node = numa_node;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &pmd->eth_addr;
+	data->nb_rx_queues = pmd->nb_queues;
+	data->nb_tx_queues = pmd->nb_queues;
+	data->drv_name = drivername;
+
+	dev->data = data;
+	dev->dev_ops = &ops;
+	dev->driver = NULL;
+	dev->rx_pkt_burst = pmd_rx_burst;
+	dev->tx_pkt_burst = pmd_tx_burst;
+	snprintf(dev->data->name, sizeof(dev->data->name), "%s", tap_name);
+
+	/* Create the first Tap device */
+	fd = tun_alloc(tap_name);
+	if (fd < 0)
+		goto error_exit;
+
+	/* Presetup the fds to -1 as being not working */
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		pmd->fds[i] = -1;
+		pmd->rxq[i].fd = -1;
+		pmd->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	pmd->rxq[0].fd = fd;
+	pmd->txq[0].fd = fd;
+	pmd->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0)
+		goto error_exit;
+
+	return 0;
+
+error_exit:
+	RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+
+	rte_free(data);
+	rte_free(pmd);
+
+	rte_eth_dev_release_port(dev);
+
+	return -EINVAL;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	char *name = (char *)extra_args;
+
+	if (value)
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
+	else
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
+			 DEFAULT_TAP_NAME, (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args)
+{
+	*(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+
+	return 0;
+}
+
+/* Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_probe(const char *name, const char *params)
+{
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	int speed;
+	char tap_name[RTE_ETH_NAME_MAX_LEN];
+
+	speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_name, sizeof(tap_name), "%s%d",
+		 DEFAULT_TAP_NAME, tap_unit++);
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
+		name, tap_name);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_SPEED_ARG,
+							 &set_interface_speed,
+							 &speed);
+				if (ret == -1)
+					goto leave;
+			}
+
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_IFACE_ARG,
+							 &set_interface_name,
+							 tap_name);
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = speed;
+
+	ret = eth_dev_tap_create(name, tap_name);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s as %s\n",
+			name, tap_name);
+		tap_unit--;		/* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/* detach a TAP device.
+ */
+static int
+rte_pmd_tap_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.probe = rte_pmd_tap_probe,
+	.remove = rte_pmd_tap_remove,
+};
+
+DRIVER_REGISTER_VDEV(net_tap, pmd_tap_drv);
+DRIVER_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index ac50a21..40d16f7 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -123,6 +123,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede -lz
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v6] drivers/net:new PMD using tun/tap host interface
  2016-10-12 20:54     ` [PATCH v6] " Keith Wiles
@ 2016-10-13 14:41       ` Ferruh Yigit
  0 siblings, 0 replies; 59+ messages in thread
From: Ferruh Yigit @ 2016-10-13 14:41 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu

On 10/12/2016 9:54 PM, Keith Wiles wrote:
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> on the local host. The PMD allows for DPDK and the host to
> communicate using a raw device interface on the host and in
> the DPDK application. The device created is a Tap device with
> a L2 packet header.
> 
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
<...>

> diff --git a/config/common_base b/config/common_base
...
> +
> +#
> +# Set TAP PMD to 'n' as it is only supported in Linux for now.
> +#

In final .config file, this comment says TAP PMD set to "n", but it is
set to "y"
Also this behavior is not unique to this config option, many config
items used as described in the comment, and they don't have same comment.

<...>

> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
...
> +
> +   Also the speed of the interface can be changed from 10G to whatever number
> +   needed, but the interface does not enforce that speed.
> +   e.g. --vdev=eth_tap,iface=foo0,speed=25000

Same comment with previous review, eth_tap should be net_tap, for all
occurrences in this document

<...>

> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
...
> +struct tap_info {
> +	char name[RTE_ETH_NAME_MAX_LEN]; /* Interface name supplied/given */
> +	int speed;			 /* Speed of interface */
> +};

Same comment with previous review, this struct is not used at all.

<...>

> +static int
> +eth_dev_tap_create(const char *name, char *tap_name)
...
> +	dev->data = data;
> +	dev->dev_ops = &ops;
> +	dev->driver = NULL;
> +	dev->rx_pkt_burst = pmd_rx_burst;
> +	dev->tx_pkt_burst = pmd_tx_burst;
> +	snprintf(dev->data->name, sizeof(dev->data->name), "%s", tap_name);

[1]

<...>

> + */
> +static int
> +rte_pmd_tap_remove(const char *name)
...
> +
> +	/* find the ethdev entry */
> +	eth_dev = rte_eth_dev_allocated(name);

This still won't work, please see [1], dev->name overwritten by
tap_name, and this function does simply a strcmp with name and dev->name.

<...>

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v7] drivers/net:new PMD using tun/tap host interface
  2016-09-21  2:00   ` [PATCH v3] drivers/net:new PMD using tun/tap host interface Keith Wiles
  2016-10-04 14:45     ` [PATCH v4] " Keith Wiles
  2016-10-12 20:54     ` [PATCH v6] " Keith Wiles
@ 2016-10-13 15:36     ` Keith Wiles
  2016-10-13 16:11     ` [PATCH v8] " Keith Wiles
  2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
  4 siblings, 0 replies; 59+ messages in thread
From: Keith Wiles @ 2016-10-13 15:36 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit

The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

v7 - Reword the comment in common_base and fix the data->name issue
v6 - fixed the checkpatch issues
v5 - merge in changes from list review see related emails
     fixed many minor edits
v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch
     Fix a typo on naming the tap device
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_base                      |   9 +
 config/common_linuxapp                  |   1 +
 doc/guides/nics/tap.rst                 | 138 ++++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 754 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 9 files changed, 970 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index cd8d167..f905709 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -394,6 +394,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_base b/config/common_base
index f5d2eff..47ef843 100644
--- a/config/common_base
+++ b/config/common_base
@@ -592,3 +592,12 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
 CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
+
+#
+# Compile the TAP PMD
+#
+# The TAP PMD is currently only built for Linux and the
+# option is enabled by default in common_linuxapp file,
+# set to 'n' in the common_base file.
+#
+CONFIG_RTE_LIBRTE_PMD_TAP=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..782b503 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..bffc649
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,138 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================================
+
+The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
+host. The PMD allows for DPDK and the host to communicate using a raw device
+interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw format
+with a L2 header. The usage for a TAP PMD is for connectivity to the local host
+using a TAP interface. When the TAP PMD is initialized it will create a number
+of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
+commands can be used to assign and query the virtual like device.
+
+These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
+with being able to be used as a network connection to the DPDK application. The
+method enable one or more interfaces is to use the --vdev=net_tap option on the
+DPDK application  command line. Each --vdev=net_tap option give will create an
+interface named dtap0, dtap1, ... and so forth.
+
+.. code-block:: console
+
+   The interfaced name can be changed by adding the iface=foo0
+   e.g. --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...
+
+.. code-block:: console
+
+   Also the speed of the interface can be changed from 10G to whatever number
+   needed, but the interface does not enforce that speed.
+   e.g. --vdev=net_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
+of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
+others to communicate with the DPDK application. The DPDK application may not
+understand network protocols like IPv4/6, UDP or TCP unless the application has
+been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands:
+
+.. code-block:: console
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you account
+for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal interface.
+
+A very crude test you can do the following:
+
+Apply the patch below and make sure you have socat installed on your system.
+
+Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
+used to build the dpdk you pulled down.
+
+Run pktgen from the pktgen repo directory in an xterm:
+    Note: change the -b options to blacklist all of your physical ports. The
+          following command line is all one line.
+
+.. code-block:: console
+
+    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
+     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
+     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
+     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
+     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
+      -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                  \
+     -f themes/black-yellow.theme
+
+I normally put the line above into a file called doit.sh, just to allow for a
+simple execution of the line above.
+
+You can leave the -f themes/black-yellow.theme off if the colors does not work
+for your system configuration.
+
+Verify with 'ifconfig -a' command in a different xterm window, should have a
+dtap0 and dtap1 interfaces created.
+
+Next set the links for the two interfaces to up via the commands below.
+
+.. code-block:: console
+
+    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Then use socat to create a loopback for the two interfaces.
+
+.. code-block:: console
+
+    sudo socat interface:dtap0 interface:dtap1
+
+Then on the Pktgen command line interface you can start sending packets using
+the commands 'start 0' and 'start 1' or you can start both at the same time
+with 'start all'. The command 'str' is an alias for 'start all' and 'stp' is
+an alias for 'stop all'.
+
+While running you should see the 64 byte counters increasing to verify the
+traffic is being looped back. You can use 'set all size XXX' to change the
+size of the packets after you stop the traffic. Use the pktgen 'help' command
+to see a list of all commands. You can also use the '-f' option to load commands
+at startup.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..e366a85 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..9e0258b
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,754 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+#define DEFAULT_TAP_NAME        "dtap"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+#define RTE_PMD_TAP_MAX_QUEUES	32
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;		/* Number of queues supported */
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/* Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char *name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) &&
+			(RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+			ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the file descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/* Callback to handle the rx burst of packets to the correct interface and
+ * file descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(!mbuf)) {
+			RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/* Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/* This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = internals->nb_queues;
+	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
+{
+	unsigned int i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *pmd = dev->data->dev_private;
+
+	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
+		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
+		rx_total += tap_stats->q_ipackets[i];
+		rx_bytes_total += tap_stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
+		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
+		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
+		tx_total += tap_stats->q_opackets[i];
+		tx_err_total += tap_stats->q_errors[i];
+		tx_bytes_total += tap_stats->q_obytes[i];
+	}
+
+	tap_stats->ipackets = rx_total;
+	tap_stats->ibytes = rx_bytes_total;
+	tap_stats->opackets = tx_total;
+	tap_stats->oerrors = tx_err_total;
+	tap_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->rxq[i].stats.ipackets = 0;
+		pmd->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->txq[i].stats.opackets = 0;
+		pmd->txq[i].stats.errs = 0;
+		pmd->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	fd = rx->fd;
+	if (fd < 0) {
+		fd = tx->fd;
+		if (fd < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			fd = tun_alloc(dev->data->name);
+			if (fd < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
+					dev->data->name);
+				return -1;
+			}
+		}
+	}
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = fd;
+	tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || !mp) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
+			internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
+				RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret = -1;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return ret;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || !dev || !addr)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, char *tap_name)
+{
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *pmd = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TAP Ethernet device with %d queues on numa %u\n",
+		 name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
+	if (!data)
+		goto error_exit;
+
+	pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
+	if (!pmd)
+		goto error_exit;
+
+	/* Use the name and not the tap_name */
+	dev = rte_eth_dev_allocate(name);
+	if (!dev)
+		goto error_exit;
+
+	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
+
+	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Setup some default values */
+	data->dev_private = pmd;
+	data->port_id = dev->data->port_id;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = drivername;
+	data->numa_node = numa_node;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &pmd->eth_addr;
+	data->nb_rx_queues = pmd->nb_queues;
+	data->nb_tx_queues = pmd->nb_queues;
+	data->drv_name = drivername;
+
+	dev->data = data;
+	dev->dev_ops = &ops;
+	dev->driver = NULL;
+	dev->rx_pkt_burst = pmd_rx_burst;
+	dev->tx_pkt_burst = pmd_tx_burst;
+	snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
+
+	/* Create the first Tap device */
+	fd = tun_alloc(tap_name);
+	if (fd < 0)
+		goto error_exit;
+
+	/* Presetup the fds to -1 as being not working */
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		pmd->fds[i] = -1;
+		pmd->rxq[i].fd = -1;
+		pmd->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	pmd->rxq[0].fd = fd;
+	pmd->txq[0].fd = fd;
+	pmd->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0)
+		goto error_exit;
+
+	return 0;
+
+error_exit:
+	RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+
+	rte_free(data);
+	rte_free(pmd);
+
+	rte_eth_dev_release_port(dev);
+
+	return -EINVAL;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	char *name = (char *)extra_args;
+
+	if (value)
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
+	else
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
+			 DEFAULT_TAP_NAME, (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args)
+{
+	*(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+
+	return 0;
+}
+
+/* Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_probe(const char *name, const char *params)
+{
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	int speed;
+	char tap_name[RTE_ETH_NAME_MAX_LEN];
+
+	speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_name, sizeof(tap_name), "%s%d",
+		 DEFAULT_TAP_NAME, tap_unit++);
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
+		name, tap_name);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_SPEED_ARG,
+							 &set_interface_speed,
+							 &speed);
+				if (ret == -1)
+					goto leave;
+			}
+
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_IFACE_ARG,
+							 &set_interface_name,
+							 tap_name);
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = speed;
+
+	ret = eth_dev_tap_create(name, tap_name);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s as %s\n",
+			name, tap_name);
+		tap_unit--;		/* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/* detach a TAP device.
+ */
+static int
+rte_pmd_tap_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.probe = rte_pmd_tap_probe,
+	.remove = rte_pmd_tap_remove,
+};
+
+DRIVER_REGISTER_VDEV(net_tap, pmd_tap_drv);
+DRIVER_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index ac50a21..40d16f7 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -123,6 +123,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede -lz
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH v8] drivers/net:new PMD using tun/tap host interface
  2016-09-21  2:00   ` [PATCH v3] drivers/net:new PMD using tun/tap host interface Keith Wiles
                       ` (2 preceding siblings ...)
  2016-10-13 15:36     ` [PATCH v7] " Keith Wiles
@ 2016-10-13 16:11     ` Keith Wiles
  2016-10-13 16:33       ` Mcnamara, John
  2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
  4 siblings, 1 reply; 59+ messages in thread
From: Keith Wiles @ 2016-10-13 16:11 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit

The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
v7 - Reword the comment in common_base and fix the data->name issue
v6 - fixed the checkpatch issues
v5 - merge in changes from list review see related emails
     fixed many minor edits
v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch
     Fix a typo on naming the tap device
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_base                      |   9 +
 config/common_linuxapp                  |   1 +
 doc/guides/nics/tap.rst                 | 138 ++++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 756 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 9 files changed, 972 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index cd8d167..f905709 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -394,6 +394,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_base b/config/common_base
index f5d2eff..47ef843 100644
--- a/config/common_base
+++ b/config/common_base
@@ -592,3 +592,12 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
 CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
+
+#
+# Compile the TAP PMD
+#
+# The TAP PMD is currently only built for Linux and the
+# option is enabled by default in common_linuxapp file,
+# set to 'n' in the common_base file.
+#
+CONFIG_RTE_LIBRTE_PMD_TAP=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..782b503 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..bffc649
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,138 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================================
+
+The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the local
+host. The PMD allows for DPDK and the host to communicate using a raw device
+interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw format
+with a L2 header. The usage for a TAP PMD is for connectivity to the local host
+using a TAP interface. When the TAP PMD is initialized it will create a number
+of tap devices in the host accessed via 'ifconfig -a' or 'ip' command. The
+commands can be used to assign and query the virtual like device.
+
+These TAP interfaces can be used with wireshark or tcpdump or Pktgen-DPDK along
+with being able to be used as a network connection to the DPDK application. The
+method enable one or more interfaces is to use the --vdev=net_tap option on the
+DPDK application  command line. Each --vdev=net_tap option give will create an
+interface named dtap0, dtap1, ... and so forth.
+
+.. code-block:: console
+
+   The interfaced name can be changed by adding the iface=foo0
+   e.g. --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...
+
+.. code-block:: console
+
+   Also the speed of the interface can be changed from 10G to whatever number
+   needed, but the interface does not enforce that speed.
+   e.g. --vdev=net_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host point
+of view you can use any host tool like tcpdump, wireshark, ping, Pktgen and
+others to communicate with the DPDK application. The DPDK application may not
+understand network protocols like IPv4/6, UDP or TCP unless the application has
+been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands:
+
+.. code-block:: console
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you account
+for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal interface.
+
+A very crude test you can do the following:
+
+Apply the patch below and make sure you have socat installed on your system.
+
+Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
+used to build the dpdk you pulled down.
+
+Run pktgen from the pktgen repo directory in an xterm:
+    Note: change the -b options to blacklist all of your physical ports. The
+          following command line is all one line.
+
+.. code-block:: console
+
+    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
+     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
+     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
+     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
+     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
+      -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                  \
+     -f themes/black-yellow.theme
+
+I normally put the line above into a file called doit.sh, just to allow for a
+simple execution of the line above.
+
+You can leave the -f themes/black-yellow.theme off if the colors does not work
+for your system configuration.
+
+Verify with 'ifconfig -a' command in a different xterm window, should have a
+dtap0 and dtap1 interfaces created.
+
+Next set the links for the two interfaces to up via the commands below.
+
+.. code-block:: console
+
+    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Then use socat to create a loopback for the two interfaces.
+
+.. code-block:: console
+
+    sudo socat interface:dtap0 interface:dtap1
+
+Then on the Pktgen command line interface you can start sending packets using
+the commands 'start 0' and 'start 1' or you can start both at the same time
+with 'start all'. The command 'str' is an alias for 'start all' and 'stp' is
+an alias for 'stop all'.
+
+While running you should see the 64 byte counters increasing to verify the
+traffic is being looped back. You can use 'set all size XXX' to change the
+size of the packets after you stop the traffic. Use the pktgen 'help' command
+to see a list of all commands. You can also use the '-f' option to load commands
+at startup.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..e366a85 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..7f303db
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,756 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+#define DEFAULT_TAP_NAME        "dtap"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+#define RTE_PMD_TAP_MAX_QUEUES	32
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;		/* Number of queues supported */
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/* Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char *name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) &&
+			(RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+			ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the file descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/* Callback to handle the rx burst of packets to the correct interface and
+ * file descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(!mbuf)) {
+			RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/* Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/* This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = internals->nb_queues;
+	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
+{
+	unsigned int i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *pmd = dev->data->dev_private;
+
+	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
+		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
+		rx_total += tap_stats->q_ipackets[i];
+		rx_bytes_total += tap_stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
+		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
+		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
+		tx_total += tap_stats->q_opackets[i];
+		tx_err_total += tap_stats->q_errors[i];
+		tx_bytes_total += tap_stats->q_obytes[i];
+	}
+
+	tap_stats->ipackets = rx_total;
+	tap_stats->ibytes = rx_bytes_total;
+	tap_stats->opackets = tx_total;
+	tap_stats->oerrors = tx_err_total;
+	tap_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->rxq[i].stats.ipackets = 0;
+		pmd->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->txq[i].stats.opackets = 0;
+		pmd->txq[i].stats.errs = 0;
+		pmd->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	fd = rx->fd;
+	if (fd < 0) {
+		fd = tx->fd;
+		if (fd < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			fd = tun_alloc(dev->data->name);
+			if (fd < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
+					dev->data->name);
+				return -1;
+			}
+		}
+	}
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = fd;
+	tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || !mp) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
+			internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
+				RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+	if (ret == -1)
+		return -1;
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return 0;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || !dev || !addr)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, char *tap_name)
+{
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *pmd = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TAP Ethernet device with %d queues on numa %u\n",
+		 name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
+	if (!data)
+		goto error_exit;
+
+	pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
+	if (!pmd)
+		goto error_exit;
+
+	/* Use the name and not the tap_name */
+	dev = rte_eth_dev_allocate(name);
+	if (!dev)
+		goto error_exit;
+
+	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
+
+	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Setup some default values */
+	data->dev_private = pmd;
+	data->port_id = dev->data->port_id;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = drivername;
+	data->numa_node = numa_node;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &pmd->eth_addr;
+	data->nb_rx_queues = pmd->nb_queues;
+	data->nb_tx_queues = pmd->nb_queues;
+	data->drv_name = drivername;
+
+	dev->data = data;
+	dev->dev_ops = &ops;
+	dev->driver = NULL;
+	dev->rx_pkt_burst = pmd_rx_burst;
+	dev->tx_pkt_burst = pmd_tx_burst;
+	snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
+
+	/* Create the first Tap device */
+	fd = tun_alloc(tap_name);
+	if (fd < 0)
+		goto error_exit;
+
+	/* Presetup the fds to -1 as being not working */
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		pmd->fds[i] = -1;
+		pmd->rxq[i].fd = -1;
+		pmd->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	pmd->rxq[0].fd = fd;
+	pmd->txq[0].fd = fd;
+	pmd->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0)
+		goto error_exit;
+
+	return 0;
+
+error_exit:
+	RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+
+	rte_free(data);
+	rte_free(pmd);
+
+	rte_eth_dev_release_port(dev);
+
+	return -EINVAL;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	char *name = (char *)extra_args;
+
+	if (value)
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
+	else
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
+			 DEFAULT_TAP_NAME, (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args)
+{
+	*(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+
+	return 0;
+}
+
+/* Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_probe(const char *name, const char *params)
+{
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	int speed;
+	char tap_name[RTE_ETH_NAME_MAX_LEN];
+
+	speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_name, sizeof(tap_name), "%s%d",
+		 DEFAULT_TAP_NAME, tap_unit++);
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
+		name, tap_name);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_SPEED_ARG,
+							 &set_interface_speed,
+							 &speed);
+				if (ret == -1)
+					goto leave;
+			}
+
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_IFACE_ARG,
+							 &set_interface_name,
+							 tap_name);
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = speed;
+
+	ret = eth_dev_tap_create(name, tap_name);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s as %s\n",
+			name, tap_name);
+		tap_unit--;		/* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/* detach a TAP device.
+ */
+static int
+rte_pmd_tap_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.probe = rte_pmd_tap_probe,
+	.remove = rte_pmd_tap_remove,
+};
+
+DRIVER_REGISTER_VDEV(net_tap, pmd_tap_drv);
+DRIVER_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index ac50a21..40d16f7 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -123,6 +123,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede -lz
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v8] drivers/net:new PMD using tun/tap host interface
  2016-10-13 16:11     ` [PATCH v8] " Keith Wiles
@ 2016-10-13 16:33       ` Mcnamara, John
  0 siblings, 0 replies; 59+ messages in thread
From: Mcnamara, John @ 2016-10-13 16:33 UTC (permalink / raw)
  To: Wiles, Keith, dev; +Cc: pmatilai, yuanhan.liu, Yigit, Ferruh

Hi,

Some doc comments below. Apologies, for the late review, I didn't see
the docs inline.

 

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Keith Wiles
> Sent: Thursday, October 13, 2016 5:11 PM
> To: dev@dpdk.org
> Cc: pmatilai@redhat.com; yuanhan.liu@linux.intel.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>
> Subject: [dpdk-dev] [PATCH v8] drivers/net:new PMD using tun/tap host
> interface
> 
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the
> local host. The PMD allows for DPDK and the host to communicate using a
> raw device interface on the host and in the DPDK application. The device
> created is a Tap device with a L2 packet header.
> 
> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> v7 - Reword the comment in common_base and fix the data->name issue
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
>  MAINTAINERS                             |   5 +
>  config/common_base                      |   9 +
>  config/common_linuxapp                  |   1 +
>  doc/guides/nics/tap.rst                 | 138 ++++++
>  drivers/net/Makefile                    |   1 +
>  drivers/net/tap/Makefile                |  57 +++
>  drivers/net/tap/rte_eth_tap.c           | 756


You need to add "tap" to the doc/guides/nics/index.rst file to
include the tap.rst in the docs.


> +
> +Tun/Tap Poll Mode Driver
> +========================================
> +
> +The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the
> +local host. The PMD allows for DPDK and the host to communicate using a
> +raw device interface on the host and in the DPDK application.
> +
> +The device created is a TAP device, which sends/receives packet in a
> +raw format with a L2 header. The usage for a TAP PMD is for
> +connectivity to the local host using a TAP interface. When the TAP PMD
> +is initialized it will create a number of tap devices in the host
> +accessed via 'ifconfig -a' or 'ip' command. The commands can be used to
> assign and query the virtual like device.

The apostrophes around the commands should be replaced with `` to render
them as fixed width.


> +
> +These TAP interfaces can be used with wireshark or tcpdump or
> +Pktgen-DPDK along with being able to be used as a network connection to
> +the DPDK application. The method enable one or more interfaces is to
> +use the --vdev=net_tap option on the DPDK application  command line.
> +Each --vdev=net_tap option give will create an interface named dtap0,
> dtap1, ... and so forth.

Same here, include any commands or variable names in backticks to render
them as fixed width in the text: ``--vdev=net_tap``


> +
> +.. code-block:: console
> +
> +   The interfaced name can be changed by adding the iface=foo0
> +   e.g. --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...

This would be better formatted as follows:

The interfaced name can be changed by adding the ``iface=foo0``, for example::

   --vdev=eth_tap,iface=foo0 --vdev=eth_tap,iface=foo1, ...



> +If you have a Network Stack in your DPDK application or something like
> +it you can utilize that stack to handle the network protocols. Plus you
> +would be able to address the interface using an IP address assigned to
> the internal interface.
> +
> +A very crude test you can do the following:
> +
> +Apply the patch below and make sure you have socat installed on your
> system.

There is no patch below in the docs. ;-) Also, this would probably be better
as a new section. Something like:

 
Example
-------

The following is a simple example of using the TUN/TAP PMD with the Pktgen
packet generator. It requires that the ``socat`` utility is installed on the
test system.

Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
used to build the dpdk you pulled down.

...

> +
> +Build DPDK, then pull down Pktgen and build pktgen using the DPDK
> +SDK/Target used to build the dpdk you pulled down.
> +
> +Run pktgen from the pktgen repo directory in an xterm:
> +    Note: change the -b options to blacklist all of your physical ports.
> The
> +          following command line is all one line.


The RST syntax for Note and the indentation are wrong here. Also the note
would be better after the example. Something like:


.. Note:

   Change the ``-b`` options to blacklist all of your physical ports. The
   following command line is all one line.

   Also, ``-f themes/black-yellow.theme`` is optional if the default colors
   work on your system configuration. See the Pktgen docs for more
   information.


Finally, if you want to include the TAP PMD in the NIC overview table:

    http://dpdk.org/doc/guides/nics/overview.html

You need to include a feature .ini file in the following dir:

    http://dpdk.org/browse/dpdk/tree/doc/guides/nics/features

Use the default as an example:

    http://dpdk.org/browse/dpdk/tree/doc/guides/nics/features/default.ini


Apart from those small changes it is a nice introduction and example code.

John

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v9] drivers/net:new PMD using tun/tap host interface
  2016-09-21  2:00   ` [PATCH v3] drivers/net:new PMD using tun/tap host interface Keith Wiles
                       ` (3 preceding siblings ...)
  2016-10-13 16:11     ` [PATCH v8] " Keith Wiles
@ 2016-10-13 22:03     ` Keith Wiles
  2016-10-14  6:41       ` Mcnamara, John
                         ` (6 more replies)
  4 siblings, 7 replies; 59+ messages in thread
From: Keith Wiles @ 2016-10-13 22:03 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit

The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
on the local host. The PMD allows for DPDK and the host to
communicate using a raw device interface on the host and in
the DPDK application. The device created is a Tap device with
a L2 packet header.

v9 - Fix up the docs to use correct syntax
v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
v7 - Reword the comment in common_base and fix the data->name issue
v6 - fixed the checkpatch issues
v5 - merge in changes from list review see related emails
     fixed many minor edits
v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch
     Fix a typo on naming the tap device
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_base                      |   9 +
 config/common_linuxapp                  |   1 +
 doc/guides/nics/index.rst               |   1 +
 doc/guides/nics/tap.rst                 | 136 ++++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 756 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 10 files changed, 971 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 8f5fa82..433d402 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -394,6 +394,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_base b/config/common_base
index f5d2eff..47ef843 100644
--- a/config/common_base
+++ b/config/common_base
@@ -592,3 +592,12 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
 CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
+
+#
+# Compile the TAP PMD
+#
+# The TAP PMD is currently only built for Linux and the
+# option is enabled by default in common_linuxapp file,
+# set to 'n' in the common_base file.
+#
+CONFIG_RTE_LIBRTE_PMD_TAP=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..782b503 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 92d56a5..f676a52 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -56,6 +56,7 @@ Network Interface Controller Drivers
     vhost
     vmxnet3
     pcap_ring
+    tap
 
 **Figures**
 
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..622b9e7
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,136 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================
+
+The ``rte_eth_tap.c`` PMD creates a device using TUN/TAP interfaces on the
+local host. The PMD allows for DPDK and the host to communicate using a raw
+device interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw
+format with a L2 header. The usage for a TAP PMD is for connectivity to the
+local host using a TAP interface. When the TAP PMD is initialized it will
+create a number of tap devices in the host accessed via ``ifconfig -a`` or
+``ip`` command. The commands can be used to assign and query the virtual like
+device.
+
+These TAP interfaces can be used with Wireshark or tcpdump or Pktgen-DPDK
+along with being able to be used as a network connection to the DPDK
+application. The method enable one or more interfaces is to use the
+``--vdev=net_tap`` option on the DPDK application command line. Each
+``--vdev=net_tap`` option give will create an interface named dtap0, dtap1,
+and so on.
+
+The interfaced name can be changed by adding the ``iface=foo0``, for example::
+
+   --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...
+
+Also the speed of the interface can be changed from 10G to whatever number
+needed, but the interface does not enforce that speed, for example::
+
+   --vdev=net_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host
+point of view you can use any host tool like tcpdump, Wireshark, ping, Pktgen
+and others to communicate with the DPDK application. The DPDK application may
+not understand network protocols like IPv4/6, UDP or TCP unless the
+application has been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands::
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you
+account for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal
+interface.
+
+Example
+-------
+
+The following is a simple example of using the TUN/TAP PMD with the Pktgen
+packet generator. It requires that the ``socat`` utility is installed on the
+test system.
+
+Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
+used to build the dpdk you pulled down.
+
+Run pktgen from the pktgen directory in a terminal with a commandline like the
+following::
+
+    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
+     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
+     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
+     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
+     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
+     -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                   \
+     -f themes/black-yellow.theme
+
+.. Note:
+
+   Change the ``-b`` options to blacklist all of your physical ports. The
+   following command line is all one line.
+
+   Also, ``-f themes/black-yellow.theme`` is optional if the default colors
+   work on your system configuration. See the Pktgen docs for more
+   information.
+
+Verify with ``ifconfig -a`` command in a different xterm window, should have a
+``dtap0`` and ``dtap1`` interfaces created.
+
+Next set the links for the two interfaces to up via the commands below::
+
+    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Then use socat to create a loopback for the two interfaces::
+
+    sudo socat interface:dtap0 interface:dtap1
+
+Then on the Pktgen command line interface you can start sending packets using
+the commands ``start 0`` and ``start 1`` or you can start both at the same
+time with ``start all``. The command ``str`` is an alias for ``start all`` and
+``stp`` is an alias for ``stop all``.
+
+While running you should see the 64 byte counters increasing to verify the
+traffic is being looped back. You can use ``set all size XXX`` to change the
+size of the packets after you stop the traffic. Use the pktgen ``help``
+command to see a list of all commands. You can also use the ``-f`` option to
+load commands at startup.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..e366a85 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..7f303db
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,756 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+#define DEFAULT_TAP_NAME        "dtap"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+#define RTE_PMD_TAP_MAX_QUEUES	32
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;		/* Number of queues supported */
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/* Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char *name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) &&
+			(RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+			ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the file descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/* Callback to handle the rx burst of packets to the correct interface and
+ * file descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(!mbuf)) {
+			RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/* Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/* This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = internals->nb_queues;
+	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
+{
+	unsigned int i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *pmd = dev->data->dev_private;
+
+	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
+		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
+		rx_total += tap_stats->q_ipackets[i];
+		rx_bytes_total += tap_stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
+		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
+		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
+		tx_total += tap_stats->q_opackets[i];
+		tx_err_total += tap_stats->q_errors[i];
+		tx_bytes_total += tap_stats->q_obytes[i];
+	}
+
+	tap_stats->ipackets = rx_total;
+	tap_stats->ibytes = rx_bytes_total;
+	tap_stats->opackets = tx_total;
+	tap_stats->oerrors = tx_err_total;
+	tap_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->rxq[i].stats.ipackets = 0;
+		pmd->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->txq[i].stats.opackets = 0;
+		pmd->txq[i].stats.errs = 0;
+		pmd->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	fd = rx->fd;
+	if (fd < 0) {
+		fd = tx->fd;
+		if (fd < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			fd = tun_alloc(dev->data->name);
+			if (fd < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
+					dev->data->name);
+				return -1;
+			}
+		}
+	}
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = fd;
+	tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || !mp) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
+			internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
+				RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+	if (ret == -1)
+		return -1;
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return 0;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || !dev || !addr)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, char *tap_name)
+{
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *pmd = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TAP Ethernet device with %d queues on numa %u\n",
+		 name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
+	if (!data)
+		goto error_exit;
+
+	pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
+	if (!pmd)
+		goto error_exit;
+
+	/* Use the name and not the tap_name */
+	dev = rte_eth_dev_allocate(name);
+	if (!dev)
+		goto error_exit;
+
+	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
+
+	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Setup some default values */
+	data->dev_private = pmd;
+	data->port_id = dev->data->port_id;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = drivername;
+	data->numa_node = numa_node;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &pmd->eth_addr;
+	data->nb_rx_queues = pmd->nb_queues;
+	data->nb_tx_queues = pmd->nb_queues;
+	data->drv_name = drivername;
+
+	dev->data = data;
+	dev->dev_ops = &ops;
+	dev->driver = NULL;
+	dev->rx_pkt_burst = pmd_rx_burst;
+	dev->tx_pkt_burst = pmd_tx_burst;
+	snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
+
+	/* Create the first Tap device */
+	fd = tun_alloc(tap_name);
+	if (fd < 0)
+		goto error_exit;
+
+	/* Presetup the fds to -1 as being not working */
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		pmd->fds[i] = -1;
+		pmd->rxq[i].fd = -1;
+		pmd->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	pmd->rxq[0].fd = fd;
+	pmd->txq[0].fd = fd;
+	pmd->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0)
+		goto error_exit;
+
+	return 0;
+
+error_exit:
+	RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+
+	rte_free(data);
+	rte_free(pmd);
+
+	rte_eth_dev_release_port(dev);
+
+	return -EINVAL;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	char *name = (char *)extra_args;
+
+	if (value)
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
+	else
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
+			 DEFAULT_TAP_NAME, (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args)
+{
+	*(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+
+	return 0;
+}
+
+/* Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_probe(const char *name, const char *params)
+{
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	int speed;
+	char tap_name[RTE_ETH_NAME_MAX_LEN];
+
+	speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_name, sizeof(tap_name), "%s%d",
+		 DEFAULT_TAP_NAME, tap_unit++);
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
+		name, tap_name);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_SPEED_ARG,
+							 &set_interface_speed,
+							 &speed);
+				if (ret == -1)
+					goto leave;
+			}
+
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_IFACE_ARG,
+							 &set_interface_name,
+							 tap_name);
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = speed;
+
+	ret = eth_dev_tap_create(name, tap_name);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(INFO, PMD, "Failed to create pmd_tap for %s as %s\n",
+			name, tap_name);
+		tap_unit--;		/* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/* detach a TAP device.
+ */
+static int
+rte_pmd_tap_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.probe = rte_pmd_tap_probe,
+	.remove = rte_pmd_tap_remove,
+};
+
+DRIVER_REGISTER_VDEV(net_tap, pmd_tap_drv);
+DRIVER_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 72c2fe7..f20fc1a 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -124,6 +124,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede -lz
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v9] drivers/net:new PMD using tun/tap host interface
  2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
@ 2016-10-14  6:41       ` Mcnamara, John
  2016-10-14  9:39       ` Ferruh Yigit
                         ` (5 subsequent siblings)
  6 siblings, 0 replies; 59+ messages in thread
From: Mcnamara, John @ 2016-10-14  6:41 UTC (permalink / raw)
  To: Wiles, Keith, dev; +Cc: pmatilai, yuanhan.liu, Yigit, Ferruh



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Keith Wiles
> Sent: Thursday, October 13, 2016 11:04 PM
> To: dev@dpdk.org
> Cc: pmatilai@redhat.com; yuanhan.liu@linux.intel.com; Yigit, Ferruh
> <ferruh.yigit@intel.com>
> Subject: [dpdk-dev] [PATCH v9] drivers/net:new PMD using tun/tap host
> interface
> 
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces on the
> local host. The PMD allows for DPDK and the host to communicate using a
> raw device interface on the host and in the DPDK application. The device
> created is a Tap device with a L2 packet header.
> 
> v9 - Fix up the docs to use correct syntax
> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> v7 - Reword the comment in common_base and fix the data->name issue
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>

For the doc part of the patch:

Acked-by: John McNamara <john.mcnamara@intel.com>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v9] drivers/net:new PMD using tun/tap host interface
  2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
  2016-10-14  6:41       ` Mcnamara, John
@ 2016-10-14  9:39       ` Ferruh Yigit
  2016-11-21 12:56       ` Ferruh Yigit
                         ` (4 subsequent siblings)
  6 siblings, 0 replies; 59+ messages in thread
From: Ferruh Yigit @ 2016-10-14  9:39 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu

On 10/13/2016 11:03 PM, Keith Wiles wrote:
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> on the local host. The PMD allows for DPDK and the host to
> communicate using a raw device interface on the host and in
> the DPDK application. The device created is a Tap device with
> a L2 packet header.
> 
> v9 - Fix up the docs to use correct syntax
> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> v7 - Reword the comment in common_base and fix the data->name issue
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---

Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v9] drivers/net:new PMD using tun/tap host interface
  2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
  2016-10-14  6:41       ` Mcnamara, John
  2016-10-14  9:39       ` Ferruh Yigit
@ 2016-11-21 12:56       ` Ferruh Yigit
  2016-11-25 19:38         ` Aws Ismail
  2016-12-07 19:38       ` [PATCH v10] drivers/net:new TUN/TAP device PMD Keith Wiles
                         ` (3 subsequent siblings)
  6 siblings, 1 reply; 59+ messages in thread
From: Ferruh Yigit @ 2016-11-21 12:56 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu

On 10/13/2016 11:03 PM, Keith Wiles wrote:
> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> on the local host. The PMD allows for DPDK and the host to
> communicate using a raw device interface on the host and in
> the DPDK application. The device created is a Tap device with
> a L2 packet header.
> 
> v9 - Fix up the docs to use correct syntax
> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> v7 - Reword the comment in common_base and fix the data->name issue
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---

Just a reminder, this is a new PMD and waiting for community review.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v9] drivers/net:new PMD using tun/tap host interface
  2016-11-21 12:56       ` Ferruh Yigit
@ 2016-11-25 19:38         ` Aws Ismail
  2016-11-29 21:36           ` Aws Ismail
  0 siblings, 1 reply; 59+ messages in thread
From: Aws Ismail @ 2016-11-25 19:38 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: Keith Wiles, DPDK, pmatilai, yuanhan.liu

Keith,

This won't build when integrated with v16.11. The register macro
prefix has been renamed. a v10 is needed.

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 7f303db..297d4b6 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -752,5 +752,6 @@ static struct rte_vdev_driver pmd_tap_drv = {
        .remove = rte_pmd_tap_remove,
 };

-DRIVER_REGISTER_VDEV(net_tap, pmd_tap_drv);
-DRIVER_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
+RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
+RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
+RTE_PMD_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");

On Mon, Nov 21, 2016 at 7:56 AM, Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> On 10/13/2016 11:03 PM, Keith Wiles wrote:
>> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
>> on the local host. The PMD allows for DPDK and the host to
>> communicate using a raw device interface on the host and in
>> the DPDK application. The device created is a Tap device with
>> a L2 packet header.
>>
>> v9 - Fix up the docs to use correct syntax
>> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
>> v7 - Reword the comment in common_base and fix the data->name issue
>> v6 - fixed the checkpatch issues
>> v5 - merge in changes from list review see related emails
>>      fixed many minor edits
>> v4 - merge with latest driver changes
>> v3 - fix includes by removing ifdef for other type besides Linux
>>      Fix the copyright notice in the Makefile
>> v2 - merge all of the patches into one patch
>>      Fix a typo on naming the tap device
>>      Update the maintainers list
>>
>> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>> ---
>
> Just a reminder, this is a new PMD and waiting for community review.

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v9] drivers/net:new PMD using tun/tap host interface
  2016-11-25 19:38         ` Aws Ismail
@ 2016-11-29 21:36           ` Aws Ismail
  2016-11-29 22:16             ` Wiles, Keith
  0 siblings, 1 reply; 59+ messages in thread
From: Aws Ismail @ 2016-11-29 21:36 UTC (permalink / raw)
  To: Keith Wiles; +Cc: Ferruh Yigit, DPDK

I have verified that adding just a single tap device works with testpmd.
But as soon as I try more than one tap device, I would get a coredump, e.g.:

root@
​localhost
:~# testpmd -c f -n 4 --socket-mem 512 --vdev=net_tap
​,iface=tap0​
--vdev=net_tap
​,iface=tap1​
-- -i
EAL: Detected 16 lcore(s)
EAL: Probing VFIO support...
EAL: VFIO support initialized
EAL: cannot open /proc/self/numa_maps, consider that all memory is in
socket_id 0
PMD: Initializing pmd_tap for net_tap as dtap0
PMD: net_tap: Create TAP Ethernet device with 32 queues on numa 0
PMD: Initializing pmd_tap for net_tap as dtap1
PMD: net_tap: Create TAP Ethernet device with 32 queues on numa 0
EAL: failed to initialize net_tap device
PANIC in rte_eal_init():
Cannot init pmd devices
6: [testpmd() [0x409149]]
5: [/lib/libc.so.6(__libc_start_main+0xf0) [0x7f3e65fa8740]]
4: [testpmd() [0x408b21]]
3: [/usr/lib/librte_eal.so.3.1(rte_eal_init+0xe09) [0x7f3e68ceaea9]]
2: [/usr/lib/librte_eal.so.3.1(__rte_panic+0xc0) [0x7f3e68ce9b5a]]
1: [/usr/lib/librte_eal.so.3.1(rte_dump_stack+0x18) [0x7f3e68cf2078]]
Aborted (core dumped)

root@
​localhost​
:~#


On Fri, Nov 25, 2016 at 2:38 PM, Aws Ismail <aws.ismail@gmail.com> wrote:

> Keith,
>
> This won't build when integrated with v16.11. The register macro
> prefix has been renamed. a v10 is needed.
>
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 7f303db..297d4b6 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -752,5 +752,6 @@ static struct rte_vdev_driver pmd_tap_drv = {
>         .remove = rte_pmd_tap_remove,
>  };
>
> -DRIVER_REGISTER_VDEV(net_tap, pmd_tap_drv);
> -DRIVER_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
> +RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
> +RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
> +RTE_PMD_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
>
> On Mon, Nov 21, 2016 at 7:56 AM, Ferruh Yigit <ferruh.yigit@intel.com>
> wrote:
> > On 10/13/2016 11:03 PM, Keith Wiles wrote:
> >> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> >> on the local host. The PMD allows for DPDK and the host to
> >> communicate using a raw device interface on the host and in
> >> the DPDK application. The device created is a Tap device with
> >> a L2 packet header.
> >>
> >> v9 - Fix up the docs to use correct syntax
> >> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> >> v7 - Reword the comment in common_base and fix the data->name issue
> >> v6 - fixed the checkpatch issues
> >> v5 - merge in changes from list review see related emails
> >>      fixed many minor edits
> >> v4 - merge with latest driver changes
> >> v3 - fix includes by removing ifdef for other type besides Linux
> >>      Fix the copyright notice in the Makefile
> >> v2 - merge all of the patches into one patch
> >>      Fix a typo on naming the tap device
> >>      Update the maintainers list
> >>
> >> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> >> ---
> >
> > Just a reminder, this is a new PMD and waiting for community review.
>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v9] drivers/net:new PMD using tun/tap host interface
  2016-11-29 21:36           ` Aws Ismail
@ 2016-11-29 22:16             ` Wiles, Keith
  0 siblings, 0 replies; 59+ messages in thread
From: Wiles, Keith @ 2016-11-29 22:16 UTC (permalink / raw)
  To: Aws Ismail; +Cc: Yigit, Ferruh, DPDK


> On Nov 29, 2016, at 3:36 PM, Aws Ismail <aws.ismail@gmail.com> wrote:
> 
> I have verified that adding just a single tap device works with testpmd. But as soon as I try more than one tap device, I would get a coredump, e.g.:
> 
> root@​localhost:~# testpmd -c f -n 4 --socket-mem 512 --vdev=net_tap​,iface=tap0​ --vdev=net_tap​,iface=tap1​ -- -i
> EAL: Detected 16 lcore(s)
> EAL: Probing VFIO support...
> EAL: VFIO support initialized
> EAL: cannot open /proc/self/numa_maps, consider that all memory is in socket_id 0
> PMD: Initializing pmd_tap for net_tap as dtap0
> PMD: net_tap: Create TAP Ethernet device with 32 queues on numa 0
> PMD: Initializing pmd_tap for net_tap as dtap1
> PMD: net_tap: Create TAP Ethernet device with 32 queues on numa 0
> EAL: failed to initialize net_tap device
> PANIC in rte_eal_init():
> Cannot init pmd devices
> 6: [testpmd() [0x409149]]
> 5: [/lib/libc.so.6(__libc_start_main+0xf0) [0x7f3e65fa8740]]
> 4: [testpmd() [0x408b21]]
> 3: [/usr/lib/librte_eal.so.3.1(rte_eal_init+0xe09) [0x7f3e68ceaea9]]
> 2: [/usr/lib/librte_eal.so.3.1(__rte_panic+0xc0) [0x7f3e68ce9b5a]]
> 1: [/usr/lib/librte_eal.so.3.1(rte_dump_stack+0x18) [0x7f3e68cf2078]]

It appears the call to rte_eth_dev_allocate() in the code is wrong. I did pass in  the variable called tap_name created in the function, but I was told I needed to pass in ‘name’ that was passed into the function. The rte_eth_dev_allocate() needs a unique name for each call and name is all was the same.

Need to change that line to use tap_name instead or tell me the real way to handle this problem.

If you want a new patch I can try to get it done, but I am working on something else at this time and it could be a few days before I can get the patch out.

> Aborted (core dumped)
> 
> root@​localhost​:~#
> 
> 
> On Fri, Nov 25, 2016 at 2:38 PM, Aws Ismail <aws.ismail@gmail.com> wrote:
> Keith,
> 
> This won't build when integrated with v16.11. The register macro
> prefix has been renamed. a v10 is needed.
> 
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 7f303db..297d4b6 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -752,5 +752,6 @@ static struct rte_vdev_driver pmd_tap_drv = {
>         .remove = rte_pmd_tap_remove,
>  };
> 
> -DRIVER_REGISTER_VDEV(net_tap, pmd_tap_drv);
> -DRIVER_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
> +RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
> +RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
> +RTE_PMD_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
> 
> On Mon, Nov 21, 2016 at 7:56 AM, Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> > On 10/13/2016 11:03 PM, Keith Wiles wrote:
> >> The rte_eth_tap.c PMD creates a device using TUN/TAP interfaces
> >> on the local host. The PMD allows for DPDK and the host to
> >> communicate using a raw device interface on the host and in
> >> the DPDK application. The device created is a Tap device with
> >> a L2 packet header.
> >>
> >> v9 - Fix up the docs to use correct syntax
> >> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> >> v7 - Reword the comment in common_base and fix the data->name issue
> >> v6 - fixed the checkpatch issues
> >> v5 - merge in changes from list review see related emails
> >>      fixed many minor edits
> >> v4 - merge with latest driver changes
> >> v3 - fix includes by removing ifdef for other type besides Linux
> >>      Fix the copyright notice in the Makefile
> >> v2 - merge all of the patches into one patch
> >>      Fix a typo on naming the tap device
> >>      Update the maintainers list
> >>
> >> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> >> ---
> >
> > Just a reminder, this is a new PMD and waiting for community review.
> 

Regards,
Keith


^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v10] drivers/net:new TUN/TAP device PMD
  2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
                         ` (2 preceding siblings ...)
  2016-11-21 12:56       ` Ferruh Yigit
@ 2016-12-07 19:38       ` Keith Wiles
  2016-12-07 20:15         ` Aws Ismail
  2016-12-09 18:16         ` Ferruh Yigit
  2016-12-09 19:05       ` [PATCH v11] " Keith Wiles
                         ` (2 subsequent siblings)
  6 siblings, 2 replies; 59+ messages in thread
From: Keith Wiles @ 2016-12-07 19:38 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit, john.mcnamara

The PMD allows for DPDK and the host to communicate using a raw
device interface on the host and in the DPDK application. The device
created is a Tap device with a L2 packet header.

v10- Change the string name used to allow for multiple devices.
v9 - Fix up the docs to use correct syntax
v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
v7 - Reword the comment in common_base and fix the data->name issue
v6 - fixed the checkpatch issues
v5 - merge in changes from list review see related emails
     fixed many minor edits
v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch
     Fix a typo on naming the tap device
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_base                      |   9 +
 config/common_linuxapp                  |   1 +
 doc/guides/nics/tap.rst                 | 136 ++++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 765 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 9 files changed, 979 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 26d9590..842fb6d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -398,6 +398,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_base b/config/common_base
index 652a839..eb51cdb 100644
--- a/config/common_base
+++ b/config/common_base
@@ -590,3 +590,12 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
 CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
+
+#
+# Compile the TAP PMD
+#
+# The TAP PMD is currently only built for Linux and the
+# option is enabled by default in common_linuxapp file,
+# set to 'n' in the common_base file.
+#
+CONFIG_RTE_LIBRTE_PMD_TAP=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..782b503 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..622b9e7
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,136 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================
+
+The ``rte_eth_tap.c`` PMD creates a device using TUN/TAP interfaces on the
+local host. The PMD allows for DPDK and the host to communicate using a raw
+device interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw
+format with a L2 header. The usage for a TAP PMD is for connectivity to the
+local host using a TAP interface. When the TAP PMD is initialized it will
+create a number of tap devices in the host accessed via ``ifconfig -a`` or
+``ip`` command. The commands can be used to assign and query the virtual like
+device.
+
+These TAP interfaces can be used with Wireshark or tcpdump or Pktgen-DPDK
+along with being able to be used as a network connection to the DPDK
+application. The method enable one or more interfaces is to use the
+``--vdev=net_tap`` option on the DPDK application command line. Each
+``--vdev=net_tap`` option give will create an interface named dtap0, dtap1,
+and so on.
+
+The interfaced name can be changed by adding the ``iface=foo0``, for example::
+
+   --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...
+
+Also the speed of the interface can be changed from 10G to whatever number
+needed, but the interface does not enforce that speed, for example::
+
+   --vdev=net_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host
+point of view you can use any host tool like tcpdump, Wireshark, ping, Pktgen
+and others to communicate with the DPDK application. The DPDK application may
+not understand network protocols like IPv4/6, UDP or TCP unless the
+application has been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands::
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you
+account for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal
+interface.
+
+Example
+-------
+
+The following is a simple example of using the TUN/TAP PMD with the Pktgen
+packet generator. It requires that the ``socat`` utility is installed on the
+test system.
+
+Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
+used to build the dpdk you pulled down.
+
+Run pktgen from the pktgen directory in a terminal with a commandline like the
+following::
+
+    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
+     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
+     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
+     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
+     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
+     -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                   \
+     -f themes/black-yellow.theme
+
+.. Note:
+
+   Change the ``-b`` options to blacklist all of your physical ports. The
+   following command line is all one line.
+
+   Also, ``-f themes/black-yellow.theme`` is optional if the default colors
+   work on your system configuration. See the Pktgen docs for more
+   information.
+
+Verify with ``ifconfig -a`` command in a different xterm window, should have a
+``dtap0`` and ``dtap1`` interfaces created.
+
+Next set the links for the two interfaces to up via the commands below::
+
+    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Then use socat to create a loopback for the two interfaces::
+
+    sudo socat interface:dtap0 interface:dtap1
+
+Then on the Pktgen command line interface you can start sending packets using
+the commands ``start 0`` and ``start 1`` or you can start both at the same
+time with ``start all``. The command ``str`` is an alias for ``start all`` and
+``stp`` is an alias for ``stop all``.
+
+While running you should see the 64 byte counters increasing to verify the
+traffic is being looped back. You can use ``set all size XXX`` to change the
+size of the packets after you stop the traffic. Use the pktgen ``help``
+command to see a list of all commands. You can also use the ``-f`` option to
+load commands at startup.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..e366a85 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..d5e2fc3
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,765 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+#define DEFAULT_TAP_NAME        "dtap"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+#define RTE_PMD_TAP_MAX_QUEUES	16
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;		/* Number of queues supported */
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/* Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char *name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) &&
+			(RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+			ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the file descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/* Callback to handle the rx burst of packets to the correct interface and
+ * file descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(!mbuf)) {
+			RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/* Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/* This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = internals->nb_queues;
+	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
+{
+	unsigned int i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *pmd = dev->data->dev_private;
+
+	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
+		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
+		rx_total += tap_stats->q_ipackets[i];
+		rx_bytes_total += tap_stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
+		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
+		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
+		tx_total += tap_stats->q_opackets[i];
+		tx_err_total += tap_stats->q_errors[i];
+		tx_bytes_total += tap_stats->q_obytes[i];
+	}
+
+	tap_stats->ipackets = rx_total;
+	tap_stats->ibytes = rx_bytes_total;
+	tap_stats->opackets = tx_total;
+	tap_stats->oerrors = tx_err_total;
+	tap_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->rxq[i].stats.ipackets = 0;
+		pmd->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->txq[i].stats.opackets = 0;
+		pmd->txq[i].stats.errs = 0;
+		pmd->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	fd = rx->fd;
+	if (fd < 0) {
+		fd = tx->fd;
+		if (fd < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			fd = tun_alloc(dev->data->name);
+			if (fd < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
+					dev->data->name);
+				return -1;
+			}
+		}
+	}
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = fd;
+	tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || !mp) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
+			internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
+				RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+	if (ret == -1)
+		return -1;
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return 0;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || !dev || !addr)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, char *tap_name)
+{
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *pmd = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TAP Ethernet device with %d queues on numa %u\n",
+		 name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
+	if (!data) {
+		RTE_LOG(INFO, PMD, "Failed to allocate data\n");
+		goto error_exit;
+	}
+
+	pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
+	if (!pmd) {
+		RTE_LOG(INFO, PMD, "Unable to allocate internal struct\n");
+		goto error_exit;
+	}
+
+	/* Use the name and not the tap_name */
+	dev = rte_eth_dev_allocate(tap_name);
+	if (!dev) {
+		RTE_LOG(INFO, PMD, "Unable to allocate device struct\n");
+		goto error_exit;
+	}
+
+	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
+
+	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Setup some default values */
+	data->dev_private = pmd;
+	data->port_id = dev->data->port_id;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = drivername;
+	data->numa_node = numa_node;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &pmd->eth_addr;
+	data->nb_rx_queues = pmd->nb_queues;
+	data->nb_tx_queues = pmd->nb_queues;
+	data->drv_name = drivername;
+
+	dev->data = data;
+	dev->dev_ops = &ops;
+	dev->driver = NULL;
+	dev->rx_pkt_burst = pmd_rx_burst;
+	dev->tx_pkt_burst = pmd_tx_burst;
+	snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
+
+	/* Create the first Tap device */
+	fd = tun_alloc(tap_name);
+	if (fd < 0) {
+		RTE_LOG(INFO, PMD, "tun_alloc() failed\n");
+		goto error_exit;
+	}
+
+	/* Presetup the fds to -1 as being not working */
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		pmd->fds[i] = -1;
+		pmd->rxq[i].fd = -1;
+		pmd->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	pmd->rxq[0].fd = fd;
+	pmd->txq[0].fd = fd;
+	pmd->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
+		RTE_LOG(INFO, PMD, "Unable to get MAC address\n");
+		goto error_exit;
+	}
+
+	return 0;
+
+error_exit:
+	RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+
+	rte_free(data);
+	rte_free(pmd);
+
+	rte_eth_dev_release_port(dev);
+
+	return -EINVAL;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	char *name = (char *)extra_args;
+
+	if (value)
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
+	else
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
+			 DEFAULT_TAP_NAME, (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args)
+{
+	*(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+
+	return 0;
+}
+
+/* Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_probe(const char *name, const char *params)
+{
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	int speed;
+	char tap_name[RTE_ETH_NAME_MAX_LEN];
+
+	speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_name, sizeof(tap_name), "%s%d",
+		 DEFAULT_TAP_NAME, tap_unit++);
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
+		name, tap_name);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_SPEED_ARG,
+							 &set_interface_speed,
+							 &speed);
+				if (ret == -1)
+					goto leave;
+			}
+
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_IFACE_ARG,
+							 &set_interface_name,
+							 tap_name);
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = speed;
+
+	ret = eth_dev_tap_create(name, tap_name);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(INFO, PMD, "Failed to create pmd for %s as %s\n",
+			name, tap_name);
+		tap_unit--;		/* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/* detach a TAP device.
+ */
+static int
+rte_pmd_tap_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.probe = rte_pmd_tap_probe,
+	.remove = rte_pmd_tap_remove,
+};
+RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
+RTE_PMD_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index f75f0e2..02c32ae 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -124,6 +124,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v10] drivers/net:new TUN/TAP device PMD
  2016-12-07 19:38       ` [PATCH v10] drivers/net:new TUN/TAP device PMD Keith Wiles
@ 2016-12-07 20:15         ` Aws Ismail
  2016-12-09 18:16         ` Ferruh Yigit
  1 sibling, 0 replies; 59+ messages in thread
From: Aws Ismail @ 2016-12-07 20:15 UTC (permalink / raw)
  To: Keith Wiles; +Cc: DPDK, pmatilai, yuanhan.liu, Ferruh Yigit, john.mcnamara

On Wed, Dec 7, 2016 at 2:38 PM, Keith Wiles <keith.wiles@intel.com> wrote:
>
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>

Tested-by: Aws Ismail <aismail@ciena.com>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v10] drivers/net:new TUN/TAP device PMD
  2016-12-07 19:38       ` [PATCH v10] drivers/net:new TUN/TAP device PMD Keith Wiles
  2016-12-07 20:15         ` Aws Ismail
@ 2016-12-09 18:16         ` Ferruh Yigit
  1 sibling, 0 replies; 59+ messages in thread
From: Ferruh Yigit @ 2016-12-09 18:16 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu, john.mcnamara

On 12/7/2016 7:38 PM, Keith Wiles wrote:
> The PMD allows for DPDK and the host to communicate using a raw
> device interface on the host and in the DPDK application. The device
> created is a Tap device with a L2 packet header.
> 
> v10- Change the string name used to allow for multiple devices.
> v9 - Fix up the docs to use correct syntax
> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> v7 - Reword the comment in common_base and fix the data->name issue
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
>  MAINTAINERS                             |   5 +
>  config/common_base                      |   9 +
>  config/common_linuxapp                  |   1 +
>  doc/guides/nics/tap.rst                 | 136 ++++++

This is giving following warning [1], this file needs to be added into
doc/guides/nics/index.rst

[1]
doc/guides/nics/tap.rst:: WARNING: document isn't included in any toctree

>  drivers/net/Makefile                    |   1 +
>  drivers/net/tap/Makefile                |  57 +++
>  drivers/net/tap/rte_eth_tap.c           | 765 ++++++++++++++++++++++++++++++++
>  drivers/net/tap/rte_pmd_tap_version.map |   4 +
>  mk/rte.app.mk                           |   1 +
>  9 files changed, 979 insertions(+)
>  create mode 100644 doc/guides/nics/tap.rst
>  create mode 100644 drivers/net/tap/Makefile
>  create mode 100644 drivers/net/tap/rte_eth_tap.c
>  create mode 100644 drivers/net/tap/rte_pmd_tap_version.map
> 
<...>

> diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
> new file mode 100644
> index 0000000..61463bf
> --- /dev/null
> +++ b/drivers/net/tap/rte_pmd_tap_version.map
> @@ -0,0 +1,4 @@
> +DPDK_16.11 {

17.02

> +
> +	local: *;
> +};

<...>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v11] drivers/net:new TUN/TAP device PMD
  2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
                         ` (3 preceding siblings ...)
  2016-12-07 19:38       ` [PATCH v10] drivers/net:new TUN/TAP device PMD Keith Wiles
@ 2016-12-09 19:05       ` Keith Wiles
  2016-12-12 12:39         ` Vasily Philipov
  2016-12-12 14:24       ` [PATCH v12] net/tap: new " Keith Wiles
  2016-12-12 14:38       ` Keith Wiles
  6 siblings, 1 reply; 59+ messages in thread
From: Keith Wiles @ 2016-12-09 19:05 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit, john.mcnamara

The PMD allows for DPDK and the host to communicate using a raw
device interface on the host and in the DPDK application. The device
created is a Tap device with a L2 packet header.

v11- Add the tap.rst to the nic/index.rst file
v10- Change the string name used to allow for multiple devices.
v9 - Fix up the docs to use correct syntax
v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
v7 - Reword the comment in common_base and fix the data->name issue
v6 - fixed the checkpatch issues
v5 - merge in changes from list review see related emails
     fixed many minor edits
v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch
     Fix a typo on naming the tap device
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_base                      |   9 +
 config/common_linuxapp                  |   1 +
 doc/guides/nics/index.rst               |   1 +
 doc/guides/nics/tap.rst                 | 136 ++++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 765 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 10 files changed, 980 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 26d9590..842fb6d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -398,6 +398,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_base b/config/common_base
index 652a839..eb51cdb 100644
--- a/config/common_base
+++ b/config/common_base
@@ -590,3 +590,12 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
 CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
+
+#
+# Compile the TAP PMD
+#
+# The TAP PMD is currently only built for Linux and the
+# option is enabled by default in common_linuxapp file,
+# set to 'n' in the common_base file.
+#
+CONFIG_RTE_LIBRTE_PMD_TAP=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..782b503 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 92d56a5..af92529 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -51,6 +51,7 @@ Network Interface Controller Drivers
     nfp
     qede
     szedata2
+    tap
     thunderx
     virtio
     vhost
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..622b9e7
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,136 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================
+
+The ``rte_eth_tap.c`` PMD creates a device using TUN/TAP interfaces on the
+local host. The PMD allows for DPDK and the host to communicate using a raw
+device interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw
+format with a L2 header. The usage for a TAP PMD is for connectivity to the
+local host using a TAP interface. When the TAP PMD is initialized it will
+create a number of tap devices in the host accessed via ``ifconfig -a`` or
+``ip`` command. The commands can be used to assign and query the virtual like
+device.
+
+These TAP interfaces can be used with Wireshark or tcpdump or Pktgen-DPDK
+along with being able to be used as a network connection to the DPDK
+application. The method enable one or more interfaces is to use the
+``--vdev=net_tap`` option on the DPDK application command line. Each
+``--vdev=net_tap`` option give will create an interface named dtap0, dtap1,
+and so on.
+
+The interfaced name can be changed by adding the ``iface=foo0``, for example::
+
+   --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...
+
+Also the speed of the interface can be changed from 10G to whatever number
+needed, but the interface does not enforce that speed, for example::
+
+   --vdev=net_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host
+point of view you can use any host tool like tcpdump, Wireshark, ping, Pktgen
+and others to communicate with the DPDK application. The DPDK application may
+not understand network protocols like IPv4/6, UDP or TCP unless the
+application has been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands::
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you
+account for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal
+interface.
+
+Example
+-------
+
+The following is a simple example of using the TUN/TAP PMD with the Pktgen
+packet generator. It requires that the ``socat`` utility is installed on the
+test system.
+
+Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
+used to build the dpdk you pulled down.
+
+Run pktgen from the pktgen directory in a terminal with a commandline like the
+following::
+
+    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
+     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
+     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
+     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
+     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
+     -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                   \
+     -f themes/black-yellow.theme
+
+.. Note:
+
+   Change the ``-b`` options to blacklist all of your physical ports. The
+   following command line is all one line.
+
+   Also, ``-f themes/black-yellow.theme`` is optional if the default colors
+   work on your system configuration. See the Pktgen docs for more
+   information.
+
+Verify with ``ifconfig -a`` command in a different xterm window, should have a
+``dtap0`` and ``dtap1`` interfaces created.
+
+Next set the links for the two interfaces to up via the commands below::
+
+    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Then use socat to create a loopback for the two interfaces::
+
+    sudo socat interface:dtap0 interface:dtap1
+
+Then on the Pktgen command line interface you can start sending packets using
+the commands ``start 0`` and ``start 1`` or you can start both at the same
+time with ``start all``. The command ``str`` is an alias for ``start all`` and
+``stp`` is an alias for ``stop all``.
+
+While running you should see the 64 byte counters increasing to verify the
+traffic is being looped back. You can use ``set all size XXX`` to change the
+size of the packets after you stop the traffic. Use the pktgen ``help``
+command to see a list of all commands. You can also use the ``-f`` option to
+load commands at startup.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..e366a85 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..d5e2fc3
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,765 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+#define DEFAULT_TAP_NAME        "dtap"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+#define RTE_PMD_TAP_MAX_QUEUES	16
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;		/* Number of queues supported */
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/* Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char *name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) &&
+			(RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+			ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the file descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/* Callback to handle the rx burst of packets to the correct interface and
+ * file descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(!mbuf)) {
+			RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/* Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/* This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = internals->nb_queues;
+	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
+{
+	unsigned int i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *pmd = dev->data->dev_private;
+
+	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
+		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
+		rx_total += tap_stats->q_ipackets[i];
+		rx_bytes_total += tap_stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
+		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
+		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
+		tx_total += tap_stats->q_opackets[i];
+		tx_err_total += tap_stats->q_errors[i];
+		tx_bytes_total += tap_stats->q_obytes[i];
+	}
+
+	tap_stats->ipackets = rx_total;
+	tap_stats->ibytes = rx_bytes_total;
+	tap_stats->opackets = tx_total;
+	tap_stats->oerrors = tx_err_total;
+	tap_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->rxq[i].stats.ipackets = 0;
+		pmd->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->txq[i].stats.opackets = 0;
+		pmd->txq[i].stats.errs = 0;
+		pmd->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	fd = rx->fd;
+	if (fd < 0) {
+		fd = tx->fd;
+		if (fd < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			fd = tun_alloc(dev->data->name);
+			if (fd < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
+					dev->data->name);
+				return -1;
+			}
+		}
+	}
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = fd;
+	tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || !mp) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
+			internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
+				RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+	if (ret == -1)
+		return -1;
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return 0;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || !dev || !addr)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, char *tap_name)
+{
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *pmd = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TAP Ethernet device with %d queues on numa %u\n",
+		 name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
+	if (!data) {
+		RTE_LOG(INFO, PMD, "Failed to allocate data\n");
+		goto error_exit;
+	}
+
+	pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
+	if (!pmd) {
+		RTE_LOG(INFO, PMD, "Unable to allocate internal struct\n");
+		goto error_exit;
+	}
+
+	/* Use the name and not the tap_name */
+	dev = rte_eth_dev_allocate(tap_name);
+	if (!dev) {
+		RTE_LOG(INFO, PMD, "Unable to allocate device struct\n");
+		goto error_exit;
+	}
+
+	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
+
+	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Setup some default values */
+	data->dev_private = pmd;
+	data->port_id = dev->data->port_id;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = drivername;
+	data->numa_node = numa_node;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &pmd->eth_addr;
+	data->nb_rx_queues = pmd->nb_queues;
+	data->nb_tx_queues = pmd->nb_queues;
+	data->drv_name = drivername;
+
+	dev->data = data;
+	dev->dev_ops = &ops;
+	dev->driver = NULL;
+	dev->rx_pkt_burst = pmd_rx_burst;
+	dev->tx_pkt_burst = pmd_tx_burst;
+	snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
+
+	/* Create the first Tap device */
+	fd = tun_alloc(tap_name);
+	if (fd < 0) {
+		RTE_LOG(INFO, PMD, "tun_alloc() failed\n");
+		goto error_exit;
+	}
+
+	/* Presetup the fds to -1 as being not working */
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		pmd->fds[i] = -1;
+		pmd->rxq[i].fd = -1;
+		pmd->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	pmd->rxq[0].fd = fd;
+	pmd->txq[0].fd = fd;
+	pmd->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
+		RTE_LOG(INFO, PMD, "Unable to get MAC address\n");
+		goto error_exit;
+	}
+
+	return 0;
+
+error_exit:
+	RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+
+	rte_free(data);
+	rte_free(pmd);
+
+	rte_eth_dev_release_port(dev);
+
+	return -EINVAL;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	char *name = (char *)extra_args;
+
+	if (value)
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
+	else
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
+			 DEFAULT_TAP_NAME, (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args)
+{
+	*(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+
+	return 0;
+}
+
+/* Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_probe(const char *name, const char *params)
+{
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	int speed;
+	char tap_name[RTE_ETH_NAME_MAX_LEN];
+
+	speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_name, sizeof(tap_name), "%s%d",
+		 DEFAULT_TAP_NAME, tap_unit++);
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
+		name, tap_name);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_SPEED_ARG,
+							 &set_interface_speed,
+							 &speed);
+				if (ret == -1)
+					goto leave;
+			}
+
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_IFACE_ARG,
+							 &set_interface_name,
+							 tap_name);
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = speed;
+
+	ret = eth_dev_tap_create(name, tap_name);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(INFO, PMD, "Failed to create pmd for %s as %s\n",
+			name, tap_name);
+		tap_unit--;		/* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/* detach a TAP device.
+ */
+static int
+rte_pmd_tap_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.probe = rte_pmd_tap_probe,
+	.remove = rte_pmd_tap_remove,
+};
+RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
+RTE_PMD_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index f75f0e2..02c32ae 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -124,6 +124,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v11] drivers/net:new TUN/TAP device PMD
  2016-12-09 19:05       ` [PATCH v11] " Keith Wiles
@ 2016-12-12 12:39         ` Vasily Philipov
  0 siblings, 0 replies; 59+ messages in thread
From: Vasily Philipov @ 2016-12-12 12:39 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit, john.mcnamara



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Keith Wiles
> Sent: Friday, December 09, 2016 21:05
> To: dev@dpdk.org
> Cc: pmatilai@redhat.com; yuanhan.liu@linux.intel.com;
> ferruh.yigit@intel.com; john.mcnamara@intel.com
> Subject: [dpdk-dev] [PATCH v11] drivers/net:new TUN/TAP device PMD
> 
> The PMD allows for DPDK and the host to communicate using a raw device
> interface on the host and in the DPDK application. The device created is a Tap
> device with a L2 packet header.
> 
> v11- Add the tap.rst to the nic/index.rst file
> v10- Change the string name used to allow for multiple devices.
> v9 - Fix up the docs to use correct syntax
> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> v7 - Reword the comment in common_base and fix the data->name issue
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
>  MAINTAINERS                             |   5 +
>  config/common_base                      |   9 +
>  config/common_linuxapp                  |   1 +
>  doc/guides/nics/index.rst               |   1 +
>  doc/guides/nics/tap.rst                 | 136 ++++++
>  drivers/net/Makefile                    |   1 +
>  drivers/net/tap/Makefile                |  57 +++
>  drivers/net/tap/rte_eth_tap.c           | 765
> ++++++++++++++++++++++++++++++++
>  drivers/net/tap/rte_pmd_tap_version.map |   4 +
>  mk/rte.app.mk                           |   1 +
>  10 files changed, 980 insertions(+)

Tested-by: Vasily Philipov <vasilyf@mellanox.com>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH v12] net/tap: new TUN/TAP device PMD
  2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
                         ` (4 preceding siblings ...)
  2016-12-09 19:05       ` [PATCH v11] " Keith Wiles
@ 2016-12-12 14:24       ` Keith Wiles
  2016-12-12 14:38       ` Keith Wiles
  6 siblings, 0 replies; 59+ messages in thread
From: Keith Wiles @ 2016-12-12 14:24 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit, john.mcnamara

The PMD allows for DPDK and the host to communicate using a raw
device interface on the host and in the DPDK application. The device
created is a Tap device with a L2 packet header.

v12- Fixup minor changes for driver_name and version number
v11- Add the tap.rst to the nic/index.rst file
v10- Change the string name used to allow for multiple devices.
v9 - Fix up the docs to use correct syntax
v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
v7 - Reword the comment in common_base and fix the data->name issue
v6 - fixed the checkpatch issues
v5 - merge in changes from list review see related emails
     fixed many minor edits
v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch
     Fix a typo on naming the tap device
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_base                      |   9 +
 config/common_linuxapp                  |   1 +
 doc/guides/nics/index.rst               |   1 +
 doc/guides/nics/tap.rst                 | 136 ++++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 765 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 10 files changed, 980 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 26d9590..842fb6d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -398,6 +398,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_base b/config/common_base
index 652a839..eb51cdb 100644
--- a/config/common_base
+++ b/config/common_base
@@ -590,3 +590,12 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
 CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
+
+#
+# Compile the TAP PMD
+#
+# The TAP PMD is currently only built for Linux and the
+# option is enabled by default in common_linuxapp file,
+# set to 'n' in the common_base file.
+#
+CONFIG_RTE_LIBRTE_PMD_TAP=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..782b503 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 92d56a5..af92529 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -51,6 +51,7 @@ Network Interface Controller Drivers
     nfp
     qede
     szedata2
+    tap
     thunderx
     virtio
     vhost
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..622b9e7
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,136 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================
+
+The ``rte_eth_tap.c`` PMD creates a device using TUN/TAP interfaces on the
+local host. The PMD allows for DPDK and the host to communicate using a raw
+device interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw
+format with a L2 header. The usage for a TAP PMD is for connectivity to the
+local host using a TAP interface. When the TAP PMD is initialized it will
+create a number of tap devices in the host accessed via ``ifconfig -a`` or
+``ip`` command. The commands can be used to assign and query the virtual like
+device.
+
+These TAP interfaces can be used with Wireshark or tcpdump or Pktgen-DPDK
+along with being able to be used as a network connection to the DPDK
+application. The method enable one or more interfaces is to use the
+``--vdev=net_tap`` option on the DPDK application command line. Each
+``--vdev=net_tap`` option give will create an interface named dtap0, dtap1,
+and so on.
+
+The interfaced name can be changed by adding the ``iface=foo0``, for example::
+
+   --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...
+
+Also the speed of the interface can be changed from 10G to whatever number
+needed, but the interface does not enforce that speed, for example::
+
+   --vdev=net_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host
+point of view you can use any host tool like tcpdump, Wireshark, ping, Pktgen
+and others to communicate with the DPDK application. The DPDK application may
+not understand network protocols like IPv4/6, UDP or TCP unless the
+application has been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands::
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you
+account for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal
+interface.
+
+Example
+-------
+
+The following is a simple example of using the TUN/TAP PMD with the Pktgen
+packet generator. It requires that the ``socat`` utility is installed on the
+test system.
+
+Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
+used to build the dpdk you pulled down.
+
+Run pktgen from the pktgen directory in a terminal with a commandline like the
+following::
+
+    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
+     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
+     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
+     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
+     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
+     -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                   \
+     -f themes/black-yellow.theme
+
+.. Note:
+
+   Change the ``-b`` options to blacklist all of your physical ports. The
+   following command line is all one line.
+
+   Also, ``-f themes/black-yellow.theme`` is optional if the default colors
+   work on your system configuration. See the Pktgen docs for more
+   information.
+
+Verify with ``ifconfig -a`` command in a different xterm window, should have a
+``dtap0`` and ``dtap1`` interfaces created.
+
+Next set the links for the two interfaces to up via the commands below::
+
+    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Then use socat to create a loopback for the two interfaces::
+
+    sudo socat interface:dtap0 interface:dtap1
+
+Then on the Pktgen command line interface you can start sending packets using
+the commands ``start 0`` and ``start 1`` or you can start both at the same
+time with ``start all``. The command ``str`` is an alias for ``start all`` and
+``stp`` is an alias for ``stop all``.
+
+While running you should see the 64 byte counters increasing to verify the
+traffic is being looped back. You can use ``set all size XXX`` to change the
+size of the packets after you stop the traffic. Use the pktgen ``help``
+command to see a list of all commands. You can also use the ``-f`` option to
+load commands at startup.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..e366a85 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..d5e2fc3
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,765 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+#define DEFAULT_TAP_NAME        "dtap"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+#define RTE_PMD_TAP_MAX_QUEUES	16
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static const char *drivername = "Tap PMD";
+static int tap_unit;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;		/* Number of queues supported */
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/* Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char *name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) &&
+			(RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+			ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the file descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/* Callback to handle the rx burst of packets to the correct interface and
+ * file descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(!mbuf)) {
+			RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/* Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/* This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->driver_name = drivername;
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = internals->nb_queues;
+	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
+{
+	unsigned int i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *pmd = dev->data->dev_private;
+
+	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
+		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
+		rx_total += tap_stats->q_ipackets[i];
+		rx_bytes_total += tap_stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
+		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
+		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
+		tx_total += tap_stats->q_opackets[i];
+		tx_err_total += tap_stats->q_errors[i];
+		tx_bytes_total += tap_stats->q_obytes[i];
+	}
+
+	tap_stats->ipackets = rx_total;
+	tap_stats->ibytes = rx_bytes_total;
+	tap_stats->opackets = tx_total;
+	tap_stats->oerrors = tx_err_total;
+	tap_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->rxq[i].stats.ipackets = 0;
+		pmd->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->txq[i].stats.opackets = 0;
+		pmd->txq[i].stats.errs = 0;
+		pmd->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	fd = rx->fd;
+	if (fd < 0) {
+		fd = tx->fd;
+		if (fd < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			fd = tun_alloc(dev->data->name);
+			if (fd < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
+					dev->data->name);
+				return -1;
+			}
+		}
+	}
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = fd;
+	tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || !mp) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
+			internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
+				RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+	if (ret == -1)
+		return -1;
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return 0;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || !dev || !addr)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, char *tap_name)
+{
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *pmd = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TAP Ethernet device with %d queues on numa %u\n",
+		 name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
+	if (!data) {
+		RTE_LOG(INFO, PMD, "Failed to allocate data\n");
+		goto error_exit;
+	}
+
+	pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
+	if (!pmd) {
+		RTE_LOG(INFO, PMD, "Unable to allocate internal struct\n");
+		goto error_exit;
+	}
+
+	/* Use the name and not the tap_name */
+	dev = rte_eth_dev_allocate(tap_name);
+	if (!dev) {
+		RTE_LOG(INFO, PMD, "Unable to allocate device struct\n");
+		goto error_exit;
+	}
+
+	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
+
+	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Setup some default values */
+	data->dev_private = pmd;
+	data->port_id = dev->data->port_id;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = drivername;
+	data->numa_node = numa_node;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &pmd->eth_addr;
+	data->nb_rx_queues = pmd->nb_queues;
+	data->nb_tx_queues = pmd->nb_queues;
+	data->drv_name = drivername;
+
+	dev->data = data;
+	dev->dev_ops = &ops;
+	dev->driver = NULL;
+	dev->rx_pkt_burst = pmd_rx_burst;
+	dev->tx_pkt_burst = pmd_tx_burst;
+	snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
+
+	/* Create the first Tap device */
+	fd = tun_alloc(tap_name);
+	if (fd < 0) {
+		RTE_LOG(INFO, PMD, "tun_alloc() failed\n");
+		goto error_exit;
+	}
+
+	/* Presetup the fds to -1 as being not working */
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		pmd->fds[i] = -1;
+		pmd->rxq[i].fd = -1;
+		pmd->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	pmd->rxq[0].fd = fd;
+	pmd->txq[0].fd = fd;
+	pmd->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
+		RTE_LOG(INFO, PMD, "Unable to get MAC address\n");
+		goto error_exit;
+	}
+
+	return 0;
+
+error_exit:
+	RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+
+	rte_free(data);
+	rte_free(pmd);
+
+	rte_eth_dev_release_port(dev);
+
+	return -EINVAL;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	char *name = (char *)extra_args;
+
+	if (value)
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
+	else
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
+			 DEFAULT_TAP_NAME, (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args)
+{
+	*(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+
+	return 0;
+}
+
+/* Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_probe(const char *name, const char *params)
+{
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	int speed;
+	char tap_name[RTE_ETH_NAME_MAX_LEN];
+
+	speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_name, sizeof(tap_name), "%s%d",
+		 DEFAULT_TAP_NAME, tap_unit++);
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
+		name, tap_name);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_SPEED_ARG,
+							 &set_interface_speed,
+							 &speed);
+				if (ret == -1)
+					goto leave;
+			}
+
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_IFACE_ARG,
+							 &set_interface_name,
+							 tap_name);
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = speed;
+
+	ret = eth_dev_tap_create(name, tap_name);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(INFO, PMD, "Failed to create pmd for %s as %s\n",
+			name, tap_name);
+		tap_unit--;		/* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/* detach a TAP device.
+ */
+static int
+rte_pmd_tap_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.probe = rte_pmd_tap_probe,
+	.remove = rte_pmd_tap_remove,
+};
+RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
+RTE_PMD_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index f75f0e2..02c32ae 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -124,6 +124,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH v12] net/tap: new TUN/TAP device PMD
  2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
                         ` (5 preceding siblings ...)
  2016-12-12 14:24       ` [PATCH v12] net/tap: new " Keith Wiles
@ 2016-12-12 14:38       ` Keith Wiles
  2016-12-12 19:13         ` Marc
  2016-12-13 13:54         ` Ferruh Yigit
  6 siblings, 2 replies; 59+ messages in thread
From: Keith Wiles @ 2016-12-12 14:38 UTC (permalink / raw)
  To: dev; +Cc: pmatilai, yuanhan.liu, ferruh.yigit, john.mcnamara

The PMD allows for DPDK and the host to communicate using a raw
device interface on the host and in the DPDK application. The device
created is a Tap device with a L2 packet header.

v12- Fixup minor changes for driver_name and version number
v11- Add the tap.rst to the nic/index.rst file
v10- Change the string name used to allow for multiple devices.
v9 - Fix up the docs to use correct syntax
v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
v7 - Reword the comment in common_base and fix the data->name issue
v6 - fixed the checkpatch issues
v5 - merge in changes from list review see related emails
     fixed many minor edits
v4 - merge with latest driver changes
v3 - fix includes by removing ifdef for other type besides Linux
     Fix the copyright notice in the Makefile
v2 - merge all of the patches into one patch
     Fix a typo on naming the tap device
     Update the maintainers list

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 MAINTAINERS                             |   5 +
 config/common_base                      |   9 +
 config/common_linuxapp                  |   1 +
 doc/guides/nics/index.rst               |   1 +
 doc/guides/nics/tap.rst                 | 136 ++++++
 drivers/net/Makefile                    |   1 +
 drivers/net/tap/Makefile                |  57 +++
 drivers/net/tap/rte_eth_tap.c           | 765 ++++++++++++++++++++++++++++++++
 drivers/net/tap/rte_pmd_tap_version.map |   4 +
 mk/rte.app.mk                           |   1 +
 10 files changed, 980 insertions(+)
 create mode 100644 doc/guides/nics/tap.rst
 create mode 100644 drivers/net/tap/Makefile
 create mode 100644 drivers/net/tap/rte_eth_tap.c
 create mode 100644 drivers/net/tap/rte_pmd_tap_version.map

diff --git a/MAINTAINERS b/MAINTAINERS
index 26d9590..842fb6d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -398,6 +398,11 @@ F: doc/guides/nics/pcap_ring.rst
 F: app/test/test_pmd_ring.c
 F: app/test/test_pmd_ring_perf.c
 
+Tap PMD
+M: Keith Wiles <keith.wiles@intel.com>
+F: drivers/net/tap
+F: doc/guides/nics/tap.rst
+
 Null Networking PMD
 M: Tetsuya Mukawa <mtetsuyah@gmail.com>
 F: drivers/net/null/
diff --git a/config/common_base b/config/common_base
index 652a839..eb51cdb 100644
--- a/config/common_base
+++ b/config/common_base
@@ -590,3 +590,12 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
 CONFIG_RTE_TEST_PMD=y
 CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
 CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
+
+#
+# Compile the TAP PMD
+#
+# The TAP PMD is currently only built for Linux and the
+# option is enabled by default in common_linuxapp file,
+# set to 'n' in the common_base file.
+#
+CONFIG_RTE_LIBRTE_PMD_TAP=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..782b503 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_POWER=y
 CONFIG_RTE_VIRTIO_USER=y
+CONFIG_RTE_LIBRTE_PMD_TAP=y
diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
index 92d56a5..af92529 100644
--- a/doc/guides/nics/index.rst
+++ b/doc/guides/nics/index.rst
@@ -51,6 +51,7 @@ Network Interface Controller Drivers
     nfp
     qede
     szedata2
+    tap
     thunderx
     virtio
     vhost
diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
new file mode 100644
index 0000000..622b9e7
--- /dev/null
+++ b/doc/guides/nics/tap.rst
@@ -0,0 +1,136 @@
+..  BSD LICENSE
+    Copyright(c) 2016 Intel Corporation. All rights reserved.
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Tun/Tap Poll Mode Driver
+========================
+
+The ``rte_eth_tap.c`` PMD creates a device using TUN/TAP interfaces on the
+local host. The PMD allows for DPDK and the host to communicate using a raw
+device interface on the host and in the DPDK application.
+
+The device created is a TAP device, which sends/receives packet in a raw
+format with a L2 header. The usage for a TAP PMD is for connectivity to the
+local host using a TAP interface. When the TAP PMD is initialized it will
+create a number of tap devices in the host accessed via ``ifconfig -a`` or
+``ip`` command. The commands can be used to assign and query the virtual like
+device.
+
+These TAP interfaces can be used with Wireshark or tcpdump or Pktgen-DPDK
+along with being able to be used as a network connection to the DPDK
+application. The method enable one or more interfaces is to use the
+``--vdev=net_tap`` option on the DPDK application command line. Each
+``--vdev=net_tap`` option give will create an interface named dtap0, dtap1,
+and so on.
+
+The interfaced name can be changed by adding the ``iface=foo0``, for example::
+
+   --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...
+
+Also the speed of the interface can be changed from 10G to whatever number
+needed, but the interface does not enforce that speed, for example::
+
+   --vdev=net_tap,iface=foo0,speed=25000
+
+After the DPDK application is started you can send and receive packets on the
+interface using the standard rx_burst/tx_burst APIs in DPDK. From the host
+point of view you can use any host tool like tcpdump, Wireshark, ping, Pktgen
+and others to communicate with the DPDK application. The DPDK application may
+not understand network protocols like IPv4/6, UDP or TCP unless the
+application has been written to understand these protocols.
+
+If you need the interface as a real network interface meaning running and has
+a valid IP address then you can do this with the following commands::
+
+   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Please change the IP addresses as you see fit.
+
+If routing is enabled on the host you can also communicate with the DPDK App
+over the internet via a standard socket layer application as long as you
+account for the protocol handing in the application.
+
+If you have a Network Stack in your DPDK application or something like it you
+can utilize that stack to handle the network protocols. Plus you would be able
+to address the interface using an IP address assigned to the internal
+interface.
+
+Example
+-------
+
+The following is a simple example of using the TUN/TAP PMD with the Pktgen
+packet generator. It requires that the ``socat`` utility is installed on the
+test system.
+
+Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
+used to build the dpdk you pulled down.
+
+Run pktgen from the pktgen directory in a terminal with a commandline like the
+following::
+
+    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
+     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
+     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
+     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
+     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
+     -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                   \
+     -f themes/black-yellow.theme
+
+.. Note:
+
+   Change the ``-b`` options to blacklist all of your physical ports. The
+   following command line is all one line.
+
+   Also, ``-f themes/black-yellow.theme`` is optional if the default colors
+   work on your system configuration. See the Pktgen docs for more
+   information.
+
+Verify with ``ifconfig -a`` command in a different xterm window, should have a
+``dtap0`` and ``dtap1`` interfaces created.
+
+Next set the links for the two interfaces to up via the commands below::
+
+    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
+    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
+
+Then use socat to create a loopback for the two interfaces::
+
+    sudo socat interface:dtap0 interface:dtap1
+
+Then on the Pktgen command line interface you can start sending packets using
+the commands ``start 0`` and ``start 1`` or you can start both at the same
+time with ``start all``. The command ``str`` is an alias for ``start all`` and
+``stp`` is an alias for ``stop all``.
+
+While running you should see the 64 byte counters increasing to verify the
+traffic is being looped back. You can use ``set all size XXX`` to change the
+size of the packets after you stop the traffic. Use the pktgen ``help``
+command to see a list of all commands. You can also use the ``-f`` option to
+load commands at startup.
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..e366a85 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
 DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
 DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
 DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
 DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
new file mode 100644
index 0000000..e18f30c
--- /dev/null
+++ b/drivers/net/tap/Makefile
@@ -0,0 +1,57 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_tap.a
+
+EXPORT_MAP := rte_pmd_tap_version.map
+
+LIBABIVER := 1
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
new file mode 100644
index 0000000..976f2d9
--- /dev/null
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -0,0 +1,765 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_ether.h>
+#include <fcntl.h>
+
+/* Linux based path to the TUN device */
+#define TUN_TAP_DEV_PATH        "/dev/net/tun"
+#define DEFAULT_TAP_NAME        "dtap"
+
+#define ETH_TAP_IFACE_ARG       "iface"
+#define ETH_TAP_SPEED_ARG       "speed"
+
+#define RTE_PMD_TAP_MAX_QUEUES	16
+
+static struct rte_vdev_driver pmd_tap_drv;
+
+static const char *valid_arguments[] = {
+	ETH_TAP_IFACE_ARG,
+	ETH_TAP_SPEED_ARG,
+	NULL
+};
+
+static int tap_unit;
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_SPEED_AUTONEG
+};
+
+struct pkt_stats {
+	uint64_t opackets;		/* Number of output packets */
+	uint64_t ipackets;		/* Number of input packets */
+	uint64_t obytes;		/* Number of bytes on output */
+	uint64_t ibytes;		/* Number of bytes on input */
+	uint64_t errs;			/* Number of error packets */
+};
+
+struct rx_queue {
+	struct rte_mempool *mp;		/* Mempool for RX packets */
+	uint16_t in_port;		/* Port ID */
+	int fd;
+
+	struct pkt_stats stats;		/* Stats for this RX queue */
+};
+
+struct tx_queue {
+	int fd;
+	struct pkt_stats stats;		/* Stats for this TX queue */
+};
+
+struct pmd_internals {
+	char name[RTE_ETH_NAME_MAX_LEN];	/* Internal Tap device name */
+	uint16_t nb_queues;		/* Number of queues supported */
+	struct ether_addr eth_addr;	/* Mac address of the device port */
+
+	int if_index;			/* IF_INDEX for the port */
+	int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
+
+	struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];	/* List of RX queues */
+	struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];	/* List of TX queues */
+};
+
+/* Tun/Tap allocation routine
+ *
+ * name is the number of the interface to use, unless NULL to take the host
+ * supplied name.
+ */
+static int
+tun_alloc(char *name)
+{
+	struct ifreq ifr;
+	unsigned int features;
+	int fd;
+
+	memset(&ifr, 0, sizeof(struct ifreq));
+
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if (name && name[0])
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	fd = open(TUN_TAP_DEV_PATH, O_RDWR);
+	if (fd < 0) {
+		RTE_LOG(ERR, PMD, "Unable to create TAP interface");
+		goto error;
+	}
+
+	/* Grab the TUN features to verify we can work */
+	if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
+		goto error;
+	}
+	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
+
+	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else if ((features & IFF_ONE_QUEUE) &&
+			(RTE_PMD_TAP_MAX_QUEUES == 1)) {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	} else {
+		ifr.ifr_flags |= IFF_MULTI_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
+			RTE_PMD_TAP_MAX_QUEUES);
+	}
+
+	/* Set the TUN/TAP configuration and get the name if needed */
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
+			ifr.ifr_name);
+		perror("TUNSETIFF");
+		goto error;
+	}
+
+	/* Always set the file descriptor to non-blocking */
+	if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
+		perror("F_SETFL, NONBLOCK");
+		goto error;
+	}
+
+	/* If the name is different that new name as default */
+	if (name && strcmp(name, ifr.ifr_name))
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
+
+	return fd;
+
+error:
+	if (fd > 0)
+		close(fd);
+	return -1;
+}
+
+/* Callback to handle the rx burst of packets to the correct interface and
+ * file descriptor(s) in a multi-queue setup.
+ */
+static uint16_t
+pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int len;
+	struct rte_mbuf *mbuf;
+	struct rx_queue *rxq = queue;
+	uint16_t num_rx;
+	unsigned long num_rx_bytes = 0;
+
+	for (num_rx = 0; num_rx < nb_pkts; ) {
+		/* allocate the next mbuf */
+		mbuf = rte_pktmbuf_alloc(rxq->mp);
+		if (unlikely(!mbuf)) {
+			RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
+			break;
+		}
+
+		len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
+			   rte_pktmbuf_tailroom(mbuf));
+		if (len <= 0) {
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
+		mbuf->data_len = len;
+		mbuf->pkt_len = len;
+		mbuf->port = rxq->in_port;
+
+		/* account for the receive frame */
+		bufs[num_rx++] = mbuf;
+		num_rx_bytes += mbuf->pkt_len;
+	}
+	rxq->stats.ipackets += num_rx;
+	rxq->stats.ibytes += num_rx_bytes;
+
+	return num_rx;
+}
+
+/* Callback to handle sending packets from the tap interface
+ */
+static uint16_t
+pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct rte_mbuf *mbuf;
+	struct tx_queue *txq = queue;
+	struct pollfd pfd;
+	uint16_t num_tx = 0;
+	unsigned long num_tx_bytes = 0;
+	int i, n;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	pfd.events = POLLOUT;
+	pfd.fd = txq->fd;
+	for (i = 0; i < nb_pkts; i++) {
+		n = poll(&pfd, 1, 0);
+
+		if (n <= 0)
+			break;
+
+		if (pfd.revents & POLLOUT) {
+			/* copy the tx frame data */
+			mbuf = bufs[num_tx];
+			n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
+				  rte_pktmbuf_pkt_len(mbuf));
+			if (n <= 0)
+				break;
+
+			num_tx++;
+			num_tx_bytes += mbuf->pkt_len;
+			rte_pktmbuf_free(mbuf);
+		}
+	}
+
+	txq->stats.opackets += num_tx;
+	txq->stats.errs += nb_pkts - num_tx;
+	txq->stats.obytes += num_tx_bytes;
+
+	return num_tx;
+}
+
+static int
+tap_dev_start(struct rte_eth_dev *dev)
+{
+	/* Force the Link up */
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/* This function gets called when the current port gets stopped.
+ */
+static void
+tap_dev_stop(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
+	dev_info->max_rx_queues = internals->nb_queues;
+	dev_info->max_tx_queues = internals->nb_queues;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static void
+tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
+{
+	unsigned int i, imax;
+	unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
+	unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
+	const struct pmd_internals *pmd = dev->data->dev_private;
+
+	imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
+		pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
+		tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
+		rx_total += tap_stats->q_ipackets[i];
+		rx_bytes_total += tap_stats->q_ibytes[i];
+	}
+
+	for (i = 0; i < imax; i++) {
+		tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
+		tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
+		tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
+		tx_total += tap_stats->q_opackets[i];
+		tx_err_total += tap_stats->q_errors[i];
+		tx_bytes_total += tap_stats->q_obytes[i];
+	}
+
+	tap_stats->ipackets = rx_total;
+	tap_stats->ibytes = rx_bytes_total;
+	tap_stats->opackets = tx_total;
+	tap_stats->oerrors = tx_err_total;
+	tap_stats->obytes = tx_bytes_total;
+}
+
+static void
+tap_stats_reset(struct rte_eth_dev *dev)
+{
+	int i;
+	struct pmd_internals *pmd = dev->data->dev_private;
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->rxq[i].stats.ipackets = 0;
+		pmd->rxq[i].stats.ibytes = 0;
+	}
+
+	for (i = 0; i < pmd->nb_queues; i++) {
+		pmd->txq[i].stats.opackets = 0;
+		pmd->txq[i].stats.errs = 0;
+		pmd->txq[i].stats.obytes = 0;
+	}
+}
+
+static void
+tap_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+tap_rx_queue_release(void *queue)
+{
+	struct rx_queue *rxq = queue;
+
+	if (rxq && (rxq->fd > 0)) {
+		close(rxq->fd);
+		rxq->fd = -1;
+	}
+}
+
+static void
+tap_tx_queue_release(void *queue)
+{
+	struct tx_queue *txq = queue;
+
+	if (txq && (txq->fd > 0)) {
+		close(txq->fd);
+		txq->fd = -1;
+	}
+}
+
+static int
+tap_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static int
+tap_setup_queue(struct rte_eth_dev *dev,
+		struct pmd_internals *internals,
+		uint16_t qid)
+{
+	struct rx_queue *rx = &internals->rxq[qid];
+	struct tx_queue *tx = &internals->txq[qid];
+	int fd;
+
+	fd = rx->fd;
+	if (fd < 0) {
+		fd = tx->fd;
+		if (fd < 0) {
+			RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
+				dev->data->name, qid);
+			fd = tun_alloc(dev->data->name);
+			if (fd < 0) {
+				RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
+					dev->data->name);
+				return -1;
+			}
+		}
+	}
+	dev->data->rx_queues[qid] = rx;
+	dev->data->tx_queues[qid] = tx;
+
+	rx->fd = fd;
+	tx->fd = fd;
+
+	return fd;
+}
+
+static int
+tap_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mp)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t buf_size;
+	int fd;
+
+	if ((rx_queue_id >= internals->nb_queues) || !mp) {
+		RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
+			internals->nb_queues, mp);
+		return -1;
+	}
+
+	internals->rxq[rx_queue_id].mp = mp;
+	internals->rxq[rx_queue_id].in_port = dev->data->port_id;
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
+				RTE_PKTMBUF_HEADROOM);
+
+	if (buf_size < ETH_FRAME_LEN) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->data->name, ETH_FRAME_LEN, buf_size);
+		return -ENOMEM;
+	}
+
+	fd = tap_setup_queue(dev, internals, rx_queue_id);
+	if (fd == -1)
+		return -1;
+
+	internals->fds[rx_queue_id] = fd;
+	RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+
+	return 0;
+}
+
+static int
+tap_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc __rte_unused,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	if (tx_queue_id >= internals->nb_queues)
+		return -1;
+
+	ret = tap_setup_queue(dev, internals, tx_queue_id);
+	if (ret == -1)
+		return -1;
+
+	RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
+		dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
+
+	return 0;
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start              = tap_dev_start,
+	.dev_stop               = tap_dev_stop,
+	.dev_close              = tap_dev_close,
+	.dev_configure          = tap_dev_configure,
+	.dev_infos_get          = tap_dev_info,
+	.rx_queue_setup         = tap_rx_queue_setup,
+	.tx_queue_setup         = tap_tx_queue_setup,
+	.rx_queue_release       = tap_rx_queue_release,
+	.tx_queue_release       = tap_tx_queue_release,
+	.link_update            = tap_link_update,
+	.stats_get              = tap_stats_get,
+	.stats_reset            = tap_stats_reset,
+};
+
+static int
+pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+	struct ifreq ifr;
+
+	if ((fd <= 0) || !dev || !addr)
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
+			ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the host based MAC address to this special MAC format */
+	ifr.ifr_hwaddr.sa_data[0] = 'T';
+	ifr.ifr_hwaddr.sa_data[1] = 'a';
+	ifr.ifr_hwaddr.sa_data[2] = 'p';
+	ifr.ifr_hwaddr.sa_data[3] = '-';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
+		RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
+			dev->data->name, ifr.ifr_name);
+		return -1;
+	}
+
+	/* Set the local application MAC address, needs to be different then
+	 * the host based MAC address.
+	 */
+	ifr.ifr_hwaddr.sa_data[0] = 'd';
+	ifr.ifr_hwaddr.sa_data[1] = 'n';
+	ifr.ifr_hwaddr.sa_data[2] = 'e';
+	ifr.ifr_hwaddr.sa_data[3] = 't';
+	ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
+	ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
+	rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
+
+	return 0;
+}
+
+static int
+eth_dev_tap_create(const char *name, char *tap_name)
+{
+	int numa_node = rte_socket_id();
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *pmd = NULL;
+	struct rte_eth_dev_data *data = NULL;
+	int i, fd = -1;
+
+	RTE_LOG(INFO, PMD,
+		"%s: Create TAP Ethernet device with %d queues on numa %u\n",
+		 name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
+
+	data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
+	if (!data) {
+		RTE_LOG(INFO, PMD, "Failed to allocate data\n");
+		goto error_exit;
+	}
+
+	pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
+	if (!pmd) {
+		RTE_LOG(INFO, PMD, "Unable to allocate internal struct\n");
+		goto error_exit;
+	}
+
+	/* Use the name and not the tap_name */
+	dev = rte_eth_dev_allocate(tap_name);
+	if (!dev) {
+		RTE_LOG(INFO, PMD, "Unable to allocate device struct\n");
+		goto error_exit;
+	}
+
+	snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
+
+	pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
+
+	/* Setup some default values */
+	data->dev_private = pmd;
+	data->port_id = dev->data->port_id;
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = pmd_tap_drv.driver.name;
+	data->numa_node = numa_node;
+
+	data->dev_link = pmd_link;
+	data->mac_addrs = &pmd->eth_addr;
+	data->nb_rx_queues = pmd->nb_queues;
+	data->nb_tx_queues = pmd->nb_queues;
+
+	dev->data = data;
+	dev->dev_ops = &ops;
+	dev->driver = NULL;
+	dev->rx_pkt_burst = pmd_rx_burst;
+	dev->tx_pkt_burst = pmd_tx_burst;
+	snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
+
+	/* Create the first Tap device */
+	fd = tun_alloc(tap_name);
+	if (fd < 0) {
+		RTE_LOG(INFO, PMD, "tun_alloc() failed\n");
+		goto error_exit;
+	}
+
+	/* Presetup the fds to -1 as being not working */
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
+		pmd->fds[i] = -1;
+		pmd->rxq[i].fd = -1;
+		pmd->txq[i].fd = -1;
+	}
+
+	/* Take the TUN/TAP fd and place in the first location */
+	pmd->rxq[0].fd = fd;
+	pmd->txq[0].fd = fd;
+	pmd->fds[0] = fd;
+
+	if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
+		RTE_LOG(INFO, PMD, "Unable to get MAC address\n");
+		goto error_exit;
+	}
+
+	return 0;
+
+error_exit:
+	RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
+
+	rte_free(data);
+	rte_free(pmd);
+
+	rte_eth_dev_release_port(dev);
+
+	return -EINVAL;
+}
+
+static int
+set_interface_name(const char *key __rte_unused,
+		   const char *value,
+		   void *extra_args)
+{
+	char *name = (char *)extra_args;
+
+	if (value)
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
+	else
+		snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
+			 DEFAULT_TAP_NAME, (tap_unit - 1));
+
+	return 0;
+}
+
+static int
+set_interface_speed(const char *key __rte_unused,
+		    const char *value,
+		    void *extra_args)
+{
+	*(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
+
+	return 0;
+}
+
+/* Open a TAP interface device.
+ */
+static int
+rte_pmd_tap_probe(const char *name, const char *params)
+{
+	int ret;
+	struct rte_kvargs *kvlist = NULL;
+	int speed;
+	char tap_name[RTE_ETH_NAME_MAX_LEN];
+
+	speed = ETH_SPEED_NUM_10G;
+	snprintf(tap_name, sizeof(tap_name), "%s%d",
+		 DEFAULT_TAP_NAME, tap_unit++);
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
+		name, tap_name);
+
+	if (params && (params[0] != '\0')) {
+		RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
+
+		kvlist = rte_kvargs_parse(params, valid_arguments);
+		if (kvlist) {
+			if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_SPEED_ARG,
+							 &set_interface_speed,
+							 &speed);
+				if (ret == -1)
+					goto leave;
+			}
+
+			if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
+				ret = rte_kvargs_process(kvlist,
+							 ETH_TAP_IFACE_ARG,
+							 &set_interface_name,
+							 tap_name);
+				if (ret == -1)
+					goto leave;
+			}
+		}
+	}
+	pmd_link.link_speed = speed;
+
+	ret = eth_dev_tap_create(name, tap_name);
+
+leave:
+	if (ret == -1) {
+		RTE_LOG(INFO, PMD, "Failed to create pmd for %s as %s\n",
+			name, tap_name);
+		tap_unit--;		/* Restore the unit number */
+	}
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+/* detach a TAP device.
+ */
+static int
+rte_pmd_tap_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
+		rte_socket_id());
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (!eth_dev)
+		return 0;
+
+	internals = eth_dev->data->dev_private;
+	for (i = 0; i < internals->nb_queues; i++)
+		if (internals->fds[i] != -1)
+			close(internals->fds[i]);
+
+	rte_free(eth_dev->data->dev_private);
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_tap_drv = {
+	.probe = rte_pmd_tap_probe,
+	.remove = rte_pmd_tap_remove,
+};
+RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
+RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
+RTE_PMD_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
new file mode 100644
index 0000000..31eca32
--- /dev/null
+++ b/drivers/net/tap/rte_pmd_tap_version.map
@@ -0,0 +1,4 @@
+DPDK_17.02 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index f75f0e2..02c32ae 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -124,6 +124,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH v12] net/tap: new TUN/TAP device PMD
  2016-12-12 14:38       ` Keith Wiles
@ 2016-12-12 19:13         ` Marc
  2016-12-12 21:09           ` Wiles, Keith
  2016-12-13 13:54         ` Ferruh Yigit
  1 sibling, 1 reply; 59+ messages in thread
From: Marc @ 2016-12-12 19:13 UTC (permalink / raw)
  To: Keith Wiles; +Cc: dev

Keith,

A bit late, but two very high level questions. Do you have performance
numbers compared to KNI? Did you consider using AF_PACKET PACKET_MMAP which
could potentially reduce the number of syscalls to 1 for RX and TX of a
burst?

Marc

On 12 December 2016 at 15:38, Keith Wiles <keith.wiles@intel.com> wrote:

> The PMD allows for DPDK and the host to communicate using a raw
> device interface on the host and in the DPDK application. The device
> created is a Tap device with a L2 packet header.
>
> v12- Fixup minor changes for driver_name and version number
> v11- Add the tap.rst to the nic/index.rst file
> v10- Change the string name used to allow for multiple devices.
> v9 - Fix up the docs to use correct syntax
> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> v7 - Reword the comment in common_base and fix the data->name issue
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
>
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
>  MAINTAINERS                             |   5 +
>  config/common_base                      |   9 +
>  config/common_linuxapp                  |   1 +
>  doc/guides/nics/index.rst               |   1 +
>  doc/guides/nics/tap.rst                 | 136 ++++++
>  drivers/net/Makefile                    |   1 +
>  drivers/net/tap/Makefile                |  57 +++
>  drivers/net/tap/rte_eth_tap.c           | 765
> ++++++++++++++++++++++++++++++++
>  drivers/net/tap/rte_pmd_tap_version.map |   4 +
>  mk/rte.app.mk                           |   1 +
>  10 files changed, 980 insertions(+)
>  create mode 100644 doc/guides/nics/tap.rst
>  create mode 100644 drivers/net/tap/Makefile
>  create mode 100644 drivers/net/tap/rte_eth_tap.c
>  create mode 100644 drivers/net/tap/rte_pmd_tap_version.map
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 26d9590..842fb6d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -398,6 +398,11 @@ F: doc/guides/nics/pcap_ring.rst
>  F: app/test/test_pmd_ring.c
>  F: app/test/test_pmd_ring_perf.c
>
> +Tap PMD
> +M: Keith Wiles <keith.wiles@intel.com>
> +F: drivers/net/tap
> +F: doc/guides/nics/tap.rst
> +
>  Null Networking PMD
>  M: Tetsuya Mukawa <mtetsuyah@gmail.com>
>  F: drivers/net/null/
> diff --git a/config/common_base b/config/common_base
> index 652a839..eb51cdb 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -590,3 +590,12 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
>  CONFIG_RTE_TEST_PMD=y
>  CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
>  CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
> +
> +#
> +# Compile the TAP PMD
> +#
> +# The TAP PMD is currently only built for Linux and the
> +# option is enabled by default in common_linuxapp file,
> +# set to 'n' in the common_base file.
> +#
> +CONFIG_RTE_LIBRTE_PMD_TAP=n
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 2483dfa..782b503 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
>  CONFIG_RTE_LIBRTE_POWER=y
>  CONFIG_RTE_VIRTIO_USER=y
> +CONFIG_RTE_LIBRTE_PMD_TAP=y
> diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
> index 92d56a5..af92529 100644
> --- a/doc/guides/nics/index.rst
> +++ b/doc/guides/nics/index.rst
> @@ -51,6 +51,7 @@ Network Interface Controller Drivers
>      nfp
>      qede
>      szedata2
> +    tap
>      thunderx
>      virtio
>      vhost
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> new file mode 100644
> index 0000000..622b9e7
> --- /dev/null
> +++ b/doc/guides/nics/tap.rst
> @@ -0,0 +1,136 @@
> +..  BSD LICENSE
> +    Copyright(c) 2016 Intel Corporation. All rights reserved.
> +    All rights reserved.
> +
> +    Redistribution and use in source and binary forms, with or without
> +    modification, are permitted provided that the following conditions
> +    are met:
> +
> +    * Redistributions of source code must retain the above copyright
> +    notice, this list of conditions and the following disclaimer.
> +    * Redistributions in binary form must reproduce the above copyright
> +    notice, this list of conditions and the following disclaimer in
> +    the documentation and/or other materials provided with the
> +    distribution.
> +    * Neither the name of Intel Corporation nor the names of its
> +    contributors may be used to endorse or promote products derived
> +    from this software without specific prior written permission.
> +
> +    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +Tun/Tap Poll Mode Driver
> +========================
> +
> +The ``rte_eth_tap.c`` PMD creates a device using TUN/TAP interfaces on the
> +local host. The PMD allows for DPDK and the host to communicate using a
> raw
> +device interface on the host and in the DPDK application.
> +
> +The device created is a TAP device, which sends/receives packet in a raw
> +format with a L2 header. The usage for a TAP PMD is for connectivity to
> the
> +local host using a TAP interface. When the TAP PMD is initialized it will
> +create a number of tap devices in the host accessed via ``ifconfig -a`` or
> +``ip`` command. The commands can be used to assign and query the virtual
> like
> +device.
> +
> +These TAP interfaces can be used with Wireshark or tcpdump or Pktgen-DPDK
> +along with being able to be used as a network connection to the DPDK
> +application. The method enable one or more interfaces is to use the
> +``--vdev=net_tap`` option on the DPDK application command line. Each
> +``--vdev=net_tap`` option give will create an interface named dtap0,
> dtap1,
> +and so on.
> +
> +The interfaced name can be changed by adding the ``iface=foo0``, for
> example::
> +
> +   --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...
> +
> +Also the speed of the interface can be changed from 10G to whatever number
> +needed, but the interface does not enforce that speed, for example::
> +
> +   --vdev=net_tap,iface=foo0,speed=25000
> +
> +After the DPDK application is started you can send and receive packets on
> the
> +interface using the standard rx_burst/tx_burst APIs in DPDK. From the host
> +point of view you can use any host tool like tcpdump, Wireshark, ping,
> Pktgen
> +and others to communicate with the DPDK application. The DPDK application
> may
> +not understand network protocols like IPv4/6, UDP or TCP unless the
> +application has been written to understand these protocols.
> +
> +If you need the interface as a real network interface meaning running and
> has
> +a valid IP address then you can do this with the following commands::
> +
> +   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
> +   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
> +
> +Please change the IP addresses as you see fit.
> +
> +If routing is enabled on the host you can also communicate with the DPDK
> App
> +over the internet via a standard socket layer application as long as you
> +account for the protocol handing in the application.
> +
> +If you have a Network Stack in your DPDK application or something like it
> you
> +can utilize that stack to handle the network protocols. Plus you would be
> able
> +to address the interface using an IP address assigned to the internal
> +interface.
> +
> +Example
> +-------
> +
> +The following is a simple example of using the TUN/TAP PMD with the Pktgen
> +packet generator. It requires that the ``socat`` utility is installed on
> the
> +test system.
> +
> +Build DPDK, then pull down Pktgen and build pktgen using the DPDK
> SDK/Target
> +used to build the dpdk you pulled down.
> +
> +Run pktgen from the pktgen directory in a terminal with a commandline
> like the
> +following::
> +
> +    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4
>     \
> +     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix
> pg   \
> +     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1
>   \
> +     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3
>   \
> +     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3
>   \
> +     -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1
>    \
> +     -f themes/black-yellow.theme
> +
> +.. Note:
> +
> +   Change the ``-b`` options to blacklist all of your physical ports. The
> +   following command line is all one line.
> +
> +   Also, ``-f themes/black-yellow.theme`` is optional if the default
> colors
> +   work on your system configuration. See the Pktgen docs for more
> +   information.
> +
> +Verify with ``ifconfig -a`` command in a different xterm window, should
> have a
> +``dtap0`` and ``dtap1`` interfaces created.
> +
> +Next set the links for the two interfaces to up via the commands below::
> +
> +    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev
> dtap0
> +    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev
> dtap1
> +
> +Then use socat to create a loopback for the two interfaces::
> +
> +    sudo socat interface:dtap0 interface:dtap1
> +
> +Then on the Pktgen command line interface you can start sending packets
> using
> +the commands ``start 0`` and ``start 1`` or you can start both at the same
> +time with ``start all``. The command ``str`` is an alias for ``start
> all`` and
> +``stp`` is an alias for ``stop all``.
> +
> +While running you should see the 64 byte counters increasing to verify the
> +traffic is being looped back. You can use ``set all size XXX`` to change
> the
> +size of the packets after you stop the traffic. Use the pktgen ``help``
> +command to see a list of all commands. You can also use the ``-f`` option
> to
> +load commands at startup.
> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index bc93230..e366a85 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
>  DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
>  DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
>  DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
>  DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
> new file mode 100644
> index 0000000..e18f30c
> --- /dev/null
> +++ b/drivers/net/tap/Makefile
> @@ -0,0 +1,57 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +#     * Redistributions of source code must retain the above copyright
> +#       notice, this list of conditions and the following disclaimer.
> +#     * Redistributions in binary form must reproduce the above copyright
> +#       notice, this list of conditions and the following disclaimer in
> +#       the documentation and/or other materials provided with the
> +#       distribution.
> +#     * Neither the name of Intel Corporation nor the names of its
> +#       contributors may be used to endorse or promote products derived
> +#       from this software without specific prior written permission.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_pmd_tap.a
> +
> +EXPORT_MAP := rte_pmd_tap_version.map
> +
> +LIBABIVER := 1
> +
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
> +
> +# this lib depends upon:
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> new file mode 100644
> index 0000000..976f2d9
> --- /dev/null
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -0,0 +1,765 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <rte_mbuf.h>
> +#include <rte_ethdev.h>
> +#include <rte_malloc.h>
> +#include <rte_vdev.h>
> +#include <rte_kvargs.h>
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/socket.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <unistd.h>
> +#include <poll.h>
> +#include <arpa/inet.h>
> +#include <linux/if.h>
> +#include <linux/if_tun.h>
> +#include <linux/if_ether.h>
> +#include <fcntl.h>
> +
> +/* Linux based path to the TUN device */
> +#define TUN_TAP_DEV_PATH        "/dev/net/tun"
> +#define DEFAULT_TAP_NAME        "dtap"
> +
> +#define ETH_TAP_IFACE_ARG       "iface"
> +#define ETH_TAP_SPEED_ARG       "speed"
> +
> +#define RTE_PMD_TAP_MAX_QUEUES 16
> +
> +static struct rte_vdev_driver pmd_tap_drv;
> +
> +static const char *valid_arguments[] = {
> +       ETH_TAP_IFACE_ARG,
> +       ETH_TAP_SPEED_ARG,
> +       NULL
> +};
> +
> +static int tap_unit;
> +
> +static struct rte_eth_link pmd_link = {
> +       .link_speed = ETH_SPEED_NUM_10G,
> +       .link_duplex = ETH_LINK_FULL_DUPLEX,
> +       .link_status = ETH_LINK_DOWN,
> +       .link_autoneg = ETH_LINK_SPEED_AUTONEG
> +};
> +
> +struct pkt_stats {
> +       uint64_t opackets;              /* Number of output packets */
> +       uint64_t ipackets;              /* Number of input packets */
> +       uint64_t obytes;                /* Number of bytes on output */
> +       uint64_t ibytes;                /* Number of bytes on input */
> +       uint64_t errs;                  /* Number of error packets */
> +};
> +
> +struct rx_queue {
> +       struct rte_mempool *mp;         /* Mempool for RX packets */
> +       uint16_t in_port;               /* Port ID */
> +       int fd;
> +
> +       struct pkt_stats stats;         /* Stats for this RX queue */
> +};
> +
> +struct tx_queue {
> +       int fd;
> +       struct pkt_stats stats;         /* Stats for this TX queue */
> +};
> +
> +struct pmd_internals {
> +       char name[RTE_ETH_NAME_MAX_LEN];        /* Internal Tap device
> name */
> +       uint16_t nb_queues;             /* Number of queues supported */
> +       struct ether_addr eth_addr;     /* Mac address of the device port
> */
> +
> +       int if_index;                   /* IF_INDEX for the port */
> +       int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
> +
> +       struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];    /* List of RX
> queues */
> +       struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];    /* List of TX
> queues */
> +};
> +
> +/* Tun/Tap allocation routine
> + *
> + * name is the number of the interface to use, unless NULL to take the
> host
> + * supplied name.
> + */
> +static int
> +tun_alloc(char *name)
> +{
> +       struct ifreq ifr;
> +       unsigned int features;
> +       int fd;
> +
> +       memset(&ifr, 0, sizeof(struct ifreq));
> +
> +       ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
> +       if (name && name[0])
> +               strncpy(ifr.ifr_name, name, IFNAMSIZ);
> +
> +       fd = open(TUN_TAP_DEV_PATH, O_RDWR);
> +       if (fd < 0) {
> +               RTE_LOG(ERR, PMD, "Unable to create TAP interface");
> +               goto error;
> +       }
> +
> +       /* Grab the TUN features to verify we can work */
> +       if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
> +               RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
> +               goto error;
> +       }
> +       RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
> +
> +       if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1))
> {
> +               RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
> +               goto error;
> +       } else if ((features & IFF_ONE_QUEUE) &&
> +                       (RTE_PMD_TAP_MAX_QUEUES == 1)) {
> +               ifr.ifr_flags |= IFF_ONE_QUEUE;
> +               RTE_LOG(DEBUG, PMD, "Single queue only support\n");
> +       } else {
> +               ifr.ifr_flags |= IFF_MULTI_QUEUE;
> +               RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
> +                       RTE_PMD_TAP_MAX_QUEUES);
> +       }
> +
> +       /* Set the TUN/TAP configuration and get the name if needed */
> +       if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
> +               RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
> +                       ifr.ifr_name);
> +               perror("TUNSETIFF");
> +               goto error;
> +       }
> +
> +       /* Always set the file descriptor to non-blocking */
> +       if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
> +               RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
> +               perror("F_SETFL, NONBLOCK");
> +               goto error;
> +       }
> +
> +       /* If the name is different that new name as default */
> +       if (name && strcmp(name, ifr.ifr_name))
> +               snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s",
> ifr.ifr_name);
> +
> +       return fd;
> +
> +error:
> +       if (fd > 0)
> +               close(fd);
> +       return -1;
> +}
> +
> +/* Callback to handle the rx burst of packets to the correct interface and
> + * file descriptor(s) in a multi-queue setup.
> + */
> +static uint16_t
> +pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> +{
> +       int len;
> +       struct rte_mbuf *mbuf;
> +       struct rx_queue *rxq = queue;
> +       uint16_t num_rx;
> +       unsigned long num_rx_bytes = 0;
> +
> +       for (num_rx = 0; num_rx < nb_pkts; ) {
> +               /* allocate the next mbuf */
> +               mbuf = rte_pktmbuf_alloc(rxq->mp);
> +               if (unlikely(!mbuf)) {
> +                       RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
> +                       break;
> +               }
> +
> +               len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
> +                          rte_pktmbuf_tailroom(mbuf));
> +               if (len <= 0) {
> +                       rte_pktmbuf_free(mbuf);
> +                       break;
> +               }
> +
> +               mbuf->data_len = len;
> +               mbuf->pkt_len = len;
> +               mbuf->port = rxq->in_port;
> +
> +               /* account for the receive frame */
> +               bufs[num_rx++] = mbuf;
> +               num_rx_bytes += mbuf->pkt_len;
> +       }
> +       rxq->stats.ipackets += num_rx;
> +       rxq->stats.ibytes += num_rx_bytes;
> +
> +       return num_rx;
> +}
> +
> +/* Callback to handle sending packets from the tap interface
> + */
> +static uint16_t
> +pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> +{
> +       struct rte_mbuf *mbuf;
> +       struct tx_queue *txq = queue;
> +       struct pollfd pfd;
> +       uint16_t num_tx = 0;
> +       unsigned long num_tx_bytes = 0;
> +       int i, n;
> +
> +       if (unlikely(nb_pkts == 0))
> +               return 0;
> +
> +       pfd.events = POLLOUT;
> +       pfd.fd = txq->fd;
> +       for (i = 0; i < nb_pkts; i++) {
> +               n = poll(&pfd, 1, 0);
> +
> +               if (n <= 0)
> +                       break;
> +
> +               if (pfd.revents & POLLOUT) {
> +                       /* copy the tx frame data */
> +                       mbuf = bufs[num_tx];
> +                       n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
> +                                 rte_pktmbuf_pkt_len(mbuf));
> +                       if (n <= 0)
> +                               break;
> +
> +                       num_tx++;
> +                       num_tx_bytes += mbuf->pkt_len;
> +                       rte_pktmbuf_free(mbuf);
> +               }
> +       }
> +
> +       txq->stats.opackets += num_tx;
> +       txq->stats.errs += nb_pkts - num_tx;
> +       txq->stats.obytes += num_tx_bytes;
> +
> +       return num_tx;
> +}
> +
> +static int
> +tap_dev_start(struct rte_eth_dev *dev)
> +{
> +       /* Force the Link up */
> +       dev->data->dev_link.link_status = ETH_LINK_UP;
> +
> +       return 0;
> +}
> +
> +/* This function gets called when the current port gets stopped.
> + */
> +static void
> +tap_dev_stop(struct rte_eth_dev *dev)
> +{
> +       int i;
> +       struct pmd_internals *internals = dev->data->dev_private;
> +
> +       for (i = 0; i < internals->nb_queues; i++)
> +               if (internals->fds[i] != -1)
> +                       close(internals->fds[i]);
> +
> +       dev->data->dev_link.link_status = ETH_LINK_DOWN;
> +}
> +
> +static int
> +tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
> +{
> +       return 0;
> +}
> +
> +static void
> +tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
> +{
> +       struct pmd_internals *internals = dev->data->dev_private;
> +
> +       dev_info->if_index = internals->if_index;
> +       dev_info->max_mac_addrs = 1;
> +       dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
> +       dev_info->max_rx_queues = internals->nb_queues;
> +       dev_info->max_tx_queues = internals->nb_queues;
> +       dev_info->min_rx_bufsize = 0;
> +       dev_info->pci_dev = NULL;
> +}
> +
> +static void
> +tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
> +{
> +       unsigned int i, imax;
> +       unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
> +       unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
> +       const struct pmd_internals *pmd = dev->data->dev_private;
> +
> +       imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
> +               pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
> +
> +       for (i = 0; i < imax; i++) {
> +               tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
> +               tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
> +               rx_total += tap_stats->q_ipackets[i];
> +               rx_bytes_total += tap_stats->q_ibytes[i];
> +       }
> +
> +       for (i = 0; i < imax; i++) {
> +               tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
> +               tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
> +               tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
> +               tx_total += tap_stats->q_opackets[i];
> +               tx_err_total += tap_stats->q_errors[i];
> +               tx_bytes_total += tap_stats->q_obytes[i];
> +       }
> +
> +       tap_stats->ipackets = rx_total;
> +       tap_stats->ibytes = rx_bytes_total;
> +       tap_stats->opackets = tx_total;
> +       tap_stats->oerrors = tx_err_total;
> +       tap_stats->obytes = tx_bytes_total;
> +}
> +
> +static void
> +tap_stats_reset(struct rte_eth_dev *dev)
> +{
> +       int i;
> +       struct pmd_internals *pmd = dev->data->dev_private;
> +
> +       for (i = 0; i < pmd->nb_queues; i++) {
> +               pmd->rxq[i].stats.ipackets = 0;
> +               pmd->rxq[i].stats.ibytes = 0;
> +       }
> +
> +       for (i = 0; i < pmd->nb_queues; i++) {
> +               pmd->txq[i].stats.opackets = 0;
> +               pmd->txq[i].stats.errs = 0;
> +               pmd->txq[i].stats.obytes = 0;
> +       }
> +}
> +
> +static void
> +tap_dev_close(struct rte_eth_dev *dev __rte_unused)
> +{
> +}
> +
> +static void
> +tap_rx_queue_release(void *queue)
> +{
> +       struct rx_queue *rxq = queue;
> +
> +       if (rxq && (rxq->fd > 0)) {
> +               close(rxq->fd);
> +               rxq->fd = -1;
> +       }
> +}
> +
> +static void
> +tap_tx_queue_release(void *queue)
> +{
> +       struct tx_queue *txq = queue;
> +
> +       if (txq && (txq->fd > 0)) {
> +               close(txq->fd);
> +               txq->fd = -1;
> +       }
> +}
> +
> +static int
> +tap_link_update(struct rte_eth_dev *dev __rte_unused,
> +               int wait_to_complete __rte_unused)
> +{
> +       return 0;
> +}
> +
> +static int
> +tap_setup_queue(struct rte_eth_dev *dev,
> +               struct pmd_internals *internals,
> +               uint16_t qid)
> +{
> +       struct rx_queue *rx = &internals->rxq[qid];
> +       struct tx_queue *tx = &internals->txq[qid];
> +       int fd;
> +
> +       fd = rx->fd;
> +       if (fd < 0) {
> +               fd = tx->fd;
> +               if (fd < 0) {
> +                       RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid
> %d\n",
> +                               dev->data->name, qid);
> +                       fd = tun_alloc(dev->data->name);
> +                       if (fd < 0) {
> +                               RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
> +                                       dev->data->name);
> +                               return -1;
> +                       }
> +               }
> +       }
> +       dev->data->rx_queues[qid] = rx;
> +       dev->data->tx_queues[qid] = tx;
> +
> +       rx->fd = fd;
> +       tx->fd = fd;
> +
> +       return fd;
> +}
> +
> +static int
> +tap_rx_queue_setup(struct rte_eth_dev *dev,
> +                  uint16_t rx_queue_id,
> +                  uint16_t nb_rx_desc __rte_unused,
> +                  unsigned int socket_id __rte_unused,
> +                  const struct rte_eth_rxconf *rx_conf __rte_unused,
> +                  struct rte_mempool *mp)
> +{
> +       struct pmd_internals *internals = dev->data->dev_private;
> +       uint16_t buf_size;
> +       int fd;
> +
> +       if ((rx_queue_id >= internals->nb_queues) || !mp) {
> +               RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
> +                       internals->nb_queues, mp);
> +               return -1;
> +       }
> +
> +       internals->rxq[rx_queue_id].mp = mp;
> +       internals->rxq[rx_queue_id].in_port = dev->data->port_id;
> +
> +       /* Now get the space available for data in the mbuf */
> +       buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
> +                               RTE_PKTMBUF_HEADROOM);
> +
> +       if (buf_size < ETH_FRAME_LEN) {
> +               RTE_LOG(ERR, PMD,
> +                       "%s: %d bytes will not fit in mbuf (%d bytes)\n",
> +                       dev->data->name, ETH_FRAME_LEN, buf_size);
> +               return -ENOMEM;
> +       }
> +
> +       fd = tap_setup_queue(dev, internals, rx_queue_id);
> +       if (fd == -1)
> +               return -1;
> +
> +       internals->fds[rx_queue_id] = fd;
> +       RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
> +               dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].
> fd);
> +
> +       return 0;
> +}
> +
> +static int
> +tap_tx_queue_setup(struct rte_eth_dev *dev,
> +                  uint16_t tx_queue_id,
> +                  uint16_t nb_tx_desc __rte_unused,
> +                  unsigned int socket_id __rte_unused,
> +                  const struct rte_eth_txconf *tx_conf __rte_unused)
> +{
> +       struct pmd_internals *internals = dev->data->dev_private;
> +       int ret;
> +
> +       if (tx_queue_id >= internals->nb_queues)
> +               return -1;
> +
> +       ret = tap_setup_queue(dev, internals, tx_queue_id);
> +       if (ret == -1)
> +               return -1;
> +
> +       RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
> +               dev->data->name, tx_queue_id, internals->txq[tx_queue_id].
> fd);
> +
> +       return 0;
> +}
> +
> +static const struct eth_dev_ops ops = {
> +       .dev_start              = tap_dev_start,
> +       .dev_stop               = tap_dev_stop,
> +       .dev_close              = tap_dev_close,
> +       .dev_configure          = tap_dev_configure,
> +       .dev_infos_get          = tap_dev_info,
> +       .rx_queue_setup         = tap_rx_queue_setup,
> +       .tx_queue_setup         = tap_tx_queue_setup,
> +       .rx_queue_release       = tap_rx_queue_release,
> +       .tx_queue_release       = tap_tx_queue_release,
> +       .link_update            = tap_link_update,
> +       .stats_get              = tap_stats_get,
> +       .stats_reset            = tap_stats_reset,
> +};
> +
> +static int
> +pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
> +{
> +       struct ifreq ifr;
> +
> +       if ((fd <= 0) || !dev || !addr)
> +               return -1;
> +
> +       memset(&ifr, 0, sizeof(ifr));
> +
> +       if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
> +               RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
> +                       ifr.ifr_name);
> +               return -1;
> +       }
> +
> +       /* Set the host based MAC address to this special MAC format */
> +       ifr.ifr_hwaddr.sa_data[0] = 'T';
> +       ifr.ifr_hwaddr.sa_data[1] = 'a';
> +       ifr.ifr_hwaddr.sa_data[2] = 'p';
> +       ifr.ifr_hwaddr.sa_data[3] = '-';
> +       ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
> +       ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
> +       if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
> +               RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR)
> (%s)\n",
> +                       dev->data->name, ifr.ifr_name);
> +               return -1;
> +       }
> +
> +       /* Set the local application MAC address, needs to be different
> then
> +        * the host based MAC address.
> +        */
> +       ifr.ifr_hwaddr.sa_data[0] = 'd';
> +       ifr.ifr_hwaddr.sa_data[1] = 'n';
> +       ifr.ifr_hwaddr.sa_data[2] = 'e';
> +       ifr.ifr_hwaddr.sa_data[3] = 't';
> +       ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
> +       ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
> +       rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
> +
> +       return 0;
> +}
> +
> +static int
> +eth_dev_tap_create(const char *name, char *tap_name)
> +{
> +       int numa_node = rte_socket_id();
> +       struct rte_eth_dev *dev = NULL;
> +       struct pmd_internals *pmd = NULL;
> +       struct rte_eth_dev_data *data = NULL;
> +       int i, fd = -1;
> +
> +       RTE_LOG(INFO, PMD,
> +               "%s: Create TAP Ethernet device with %d queues on numa
> %u\n",
> +                name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
> +
> +       data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
> +       if (!data) {
> +               RTE_LOG(INFO, PMD, "Failed to allocate data\n");
> +               goto error_exit;
> +       }
> +
> +       pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
> +       if (!pmd) {
> +               RTE_LOG(INFO, PMD, "Unable to allocate internal struct\n");
> +               goto error_exit;
> +       }
> +
> +       /* Use the name and not the tap_name */
> +       dev = rte_eth_dev_allocate(tap_name);
> +       if (!dev) {
> +               RTE_LOG(INFO, PMD, "Unable to allocate device struct\n");
> +               goto error_exit;
> +       }
> +
> +       snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
> +
> +       pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
> +
> +       /* Setup some default values */
> +       data->dev_private = pmd;
> +       data->port_id = dev->data->port_id;
> +       data->dev_flags = RTE_ETH_DEV_DETACHABLE;
> +       data->kdrv = RTE_KDRV_NONE;
> +       data->drv_name = pmd_tap_drv.driver.name;
> +       data->numa_node = numa_node;
> +
> +       data->dev_link = pmd_link;
> +       data->mac_addrs = &pmd->eth_addr;
> +       data->nb_rx_queues = pmd->nb_queues;
> +       data->nb_tx_queues = pmd->nb_queues;
> +
> +       dev->data = data;
> +       dev->dev_ops = &ops;
> +       dev->driver = NULL;
> +       dev->rx_pkt_burst = pmd_rx_burst;
> +       dev->tx_pkt_burst = pmd_tx_burst;
> +       snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
> +
> +       /* Create the first Tap device */
> +       fd = tun_alloc(tap_name);
> +       if (fd < 0) {
> +               RTE_LOG(INFO, PMD, "tun_alloc() failed\n");
> +               goto error_exit;
> +       }
> +
> +       /* Presetup the fds to -1 as being not working */
> +       for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
> +               pmd->fds[i] = -1;
> +               pmd->rxq[i].fd = -1;
> +               pmd->txq[i].fd = -1;
> +       }
> +
> +       /* Take the TUN/TAP fd and place in the first location */
> +       pmd->rxq[0].fd = fd;
> +       pmd->txq[0].fd = fd;
> +       pmd->fds[0] = fd;
> +
> +       if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
> +               RTE_LOG(INFO, PMD, "Unable to get MAC address\n");
> +               goto error_exit;
> +       }
> +
> +       return 0;
> +
> +error_exit:
> +       RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
> +
> +       rte_free(data);
> +       rte_free(pmd);
> +
> +       rte_eth_dev_release_port(dev);
> +
> +       return -EINVAL;
> +}
> +
> +static int
> +set_interface_name(const char *key __rte_unused,
> +                  const char *value,
> +                  void *extra_args)
> +{
> +       char *name = (char *)extra_args;
> +
> +       if (value)
> +               snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
> +       else
> +               snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
> +                        DEFAULT_TAP_NAME, (tap_unit - 1));
> +
> +       return 0;
> +}
> +
> +static int
> +set_interface_speed(const char *key __rte_unused,
> +                   const char *value,
> +                   void *extra_args)
> +{
> +       *(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
> +
> +       return 0;
> +}
> +
> +/* Open a TAP interface device.
> + */
> +static int
> +rte_pmd_tap_probe(const char *name, const char *params)
> +{
> +       int ret;
> +       struct rte_kvargs *kvlist = NULL;
> +       int speed;
> +       char tap_name[RTE_ETH_NAME_MAX_LEN];
> +
> +       speed = ETH_SPEED_NUM_10G;
> +       snprintf(tap_name, sizeof(tap_name), "%s%d",
> +                DEFAULT_TAP_NAME, tap_unit++);
> +
> +       RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
> +               name, tap_name);
> +
> +       if (params && (params[0] != '\0')) {
> +               RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
> +
> +               kvlist = rte_kvargs_parse(params, valid_arguments);
> +               if (kvlist) {
> +                       if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) ==
> 1) {
> +                               ret = rte_kvargs_process(kvlist,
> +                                                        ETH_TAP_SPEED_ARG,
> +
> &set_interface_speed,
> +                                                        &speed);
> +                               if (ret == -1)
> +                                       goto leave;
> +                       }
> +
> +                       if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) ==
> 1) {
> +                               ret = rte_kvargs_process(kvlist,
> +                                                        ETH_TAP_IFACE_ARG,
> +
> &set_interface_name,
> +                                                        tap_name);
> +                               if (ret == -1)
> +                                       goto leave;
> +                       }
> +               }
> +       }
> +       pmd_link.link_speed = speed;
> +
> +       ret = eth_dev_tap_create(name, tap_name);
> +
> +leave:
> +       if (ret == -1) {
> +               RTE_LOG(INFO, PMD, "Failed to create pmd for %s as %s\n",
> +                       name, tap_name);
> +               tap_unit--;             /* Restore the unit number */
> +       }
> +       rte_kvargs_free(kvlist);
> +
> +       return ret;
> +}
> +
> +/* detach a TAP device.
> + */
> +static int
> +rte_pmd_tap_remove(const char *name)
> +{
> +       struct rte_eth_dev *eth_dev = NULL;
> +       struct pmd_internals *internals;
> +       int i;
> +
> +       RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
> +               rte_socket_id());
> +
> +       /* find the ethdev entry */
> +       eth_dev = rte_eth_dev_allocated(name);
> +       if (!eth_dev)
> +               return 0;
> +
> +       internals = eth_dev->data->dev_private;
> +       for (i = 0; i < internals->nb_queues; i++)
> +               if (internals->fds[i] != -1)
> +                       close(internals->fds[i]);
> +
> +       rte_free(eth_dev->data->dev_private);
> +       rte_free(eth_dev->data);
> +
> +       rte_eth_dev_release_port(eth_dev);
> +
> +       return 0;
> +}
> +
> +static struct rte_vdev_driver pmd_tap_drv = {
> +       .probe = rte_pmd_tap_probe,
> +       .remove = rte_pmd_tap_remove,
> +};
> +RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
> +RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
> +RTE_PMD_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
> diff --git a/drivers/net/tap/rte_pmd_tap_version.map
> b/drivers/net/tap/rte_pmd_tap_version.map
> new file mode 100644
> index 0000000..31eca32
> --- /dev/null
> +++ b/drivers/net/tap/rte_pmd_tap_version.map
> @@ -0,0 +1,4 @@
> +DPDK_17.02 {
> +
> +       local: *;
> +};
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index f75f0e2..02c32ae 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -124,6 +124,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       +=
> -lrte_pmd_pcap -lpcap
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) +=
> -lrte_pmd_thunderx_nicvf -lm
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
>  ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
> --
> 2.8.0.GIT
>
>

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v12] net/tap: new TUN/TAP device PMD
  2016-12-12 19:13         ` Marc
@ 2016-12-12 21:09           ` Wiles, Keith
  0 siblings, 0 replies; 59+ messages in thread
From: Wiles, Keith @ 2016-12-12 21:09 UTC (permalink / raw)
  To: Marc; +Cc: dev


> On Dec 12, 2016, at 1:13 PM, Marc <marcdevel@gmail.com> wrote:
> 
> Keith,
> 
> A bit late, but two very high level questions. Do you have performance numbers compared to KNI? Did you consider using AF_PACKET PACKET_MMAP which could potentially reduce the number of syscalls to 1 for RX and TX of a burst?

Hi Marc,

I was not trying to create a high performance interface, just a Tap interface to use standard applications and calls to send/receive traffic to the DPDK application. I did not expect other then some management like interface in the application would use the Tap PMD.

> 
> Marc
> 
> On 12 December 2016 at 15:38, Keith Wiles <keith.wiles@intel.com> wrote:
> The PMD allows for DPDK and the host to communicate using a raw
> device interface on the host and in the DPDK application. The device
> created is a Tap device with a L2 packet header.
> 
> v12- Fixup minor changes for driver_name and version number
> v11- Add the tap.rst to the nic/index.rst file
> v10- Change the string name used to allow for multiple devices.
> v9 - Fix up the docs to use correct syntax
> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> v7 - Reword the comment in common_base and fix the data->name issue
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
>  MAINTAINERS                             |   5 +
>  config/common_base                      |   9 +
>  config/common_linuxapp                  |   1 +
>  doc/guides/nics/index.rst               |   1 +
>  doc/guides/nics/tap.rst                 | 136 ++++++
>  drivers/net/Makefile                    |   1 +
>  drivers/net/tap/Makefile                |  57 +++
>  drivers/net/tap/rte_eth_tap.c           | 765 ++++++++++++++++++++++++++++++++
>  drivers/net/tap/rte_pmd_tap_version.map |   4 +
>  mk/rte.app.mk                           |   1 +
>  10 files changed, 980 insertions(+)
>  create mode 100644 doc/guides/nics/tap.rst
>  create mode 100644 drivers/net/tap/Makefile
>  create mode 100644 drivers/net/tap/rte_eth_tap.c
>  create mode 100644 drivers/net/tap/rte_pmd_tap_version.map
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 26d9590..842fb6d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -398,6 +398,11 @@ F: doc/guides/nics/pcap_ring.rst
>  F: app/test/test_pmd_ring.c
>  F: app/test/test_pmd_ring_perf.c
> 
> +Tap PMD
> +M: Keith Wiles <keith.wiles@intel.com>
> +F: drivers/net/tap
> +F: doc/guides/nics/tap.rst
> +
>  Null Networking PMD
>  M: Tetsuya Mukawa <mtetsuyah@gmail.com>
>  F: drivers/net/null/
> diff --git a/config/common_base b/config/common_base
> index 652a839..eb51cdb 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -590,3 +590,12 @@ CONFIG_RTE_APP_TEST_RESOURCE_TAR=n
>  CONFIG_RTE_TEST_PMD=y
>  CONFIG_RTE_TEST_PMD_RECORD_CORE_CYCLES=n
>  CONFIG_RTE_TEST_PMD_RECORD_BURST_STATS=n
> +
> +#
> +# Compile the TAP PMD
> +#
> +# The TAP PMD is currently only built for Linux and the
> +# option is enabled by default in common_linuxapp file,
> +# set to 'n' in the common_base file.
> +#
> +CONFIG_RTE_LIBRTE_PMD_TAP=n
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 2483dfa..782b503 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -44,3 +44,4 @@ CONFIG_RTE_LIBRTE_PMD_VHOST=y
>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
>  CONFIG_RTE_LIBRTE_POWER=y
>  CONFIG_RTE_VIRTIO_USER=y
> +CONFIG_RTE_LIBRTE_PMD_TAP=y
> diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
> index 92d56a5..af92529 100644
> --- a/doc/guides/nics/index.rst
> +++ b/doc/guides/nics/index.rst
> @@ -51,6 +51,7 @@ Network Interface Controller Drivers
>      nfp
>      qede
>      szedata2
> +    tap
>      thunderx
>      virtio
>      vhost
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> new file mode 100644
> index 0000000..622b9e7
> --- /dev/null
> +++ b/doc/guides/nics/tap.rst
> @@ -0,0 +1,136 @@
> +..  BSD LICENSE
> +    Copyright(c) 2016 Intel Corporation. All rights reserved.
> +    All rights reserved.
> +
> +    Redistribution and use in source and binary forms, with or without
> +    modification, are permitted provided that the following conditions
> +    are met:
> +
> +    * Redistributions of source code must retain the above copyright
> +    notice, this list of conditions and the following disclaimer.
> +    * Redistributions in binary form must reproduce the above copyright
> +    notice, this list of conditions and the following disclaimer in
> +    the documentation and/or other materials provided with the
> +    distribution.
> +    * Neither the name of Intel Corporation nor the names of its
> +    contributors may be used to endorse or promote products derived
> +    from this software without specific prior written permission.
> +
> +    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +Tun/Tap Poll Mode Driver
> +========================
> +
> +The ``rte_eth_tap.c`` PMD creates a device using TUN/TAP interfaces on the
> +local host. The PMD allows for DPDK and the host to communicate using a raw
> +device interface on the host and in the DPDK application.
> +
> +The device created is a TAP device, which sends/receives packet in a raw
> +format with a L2 header. The usage for a TAP PMD is for connectivity to the
> +local host using a TAP interface. When the TAP PMD is initialized it will
> +create a number of tap devices in the host accessed via ``ifconfig -a`` or
> +``ip`` command. The commands can be used to assign and query the virtual like
> +device.
> +
> +These TAP interfaces can be used with Wireshark or tcpdump or Pktgen-DPDK
> +along with being able to be used as a network connection to the DPDK
> +application. The method enable one or more interfaces is to use the
> +``--vdev=net_tap`` option on the DPDK application command line. Each
> +``--vdev=net_tap`` option give will create an interface named dtap0, dtap1,
> +and so on.
> +
> +The interfaced name can be changed by adding the ``iface=foo0``, for example::
> +
> +   --vdev=net_tap,iface=foo0 --vdev=net_tap,iface=foo1, ...
> +
> +Also the speed of the interface can be changed from 10G to whatever number
> +needed, but the interface does not enforce that speed, for example::
> +
> +   --vdev=net_tap,iface=foo0,speed=25000
> +
> +After the DPDK application is started you can send and receive packets on the
> +interface using the standard rx_burst/tx_burst APIs in DPDK. From the host
> +point of view you can use any host tool like tcpdump, Wireshark, ping, Pktgen
> +and others to communicate with the DPDK application. The DPDK application may
> +not understand network protocols like IPv4/6, UDP or TCP unless the
> +application has been written to understand these protocols.
> +
> +If you need the interface as a real network interface meaning running and has
> +a valid IP address then you can do this with the following commands::
> +
> +   sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
> +   sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
> +
> +Please change the IP addresses as you see fit.
> +
> +If routing is enabled on the host you can also communicate with the DPDK App
> +over the internet via a standard socket layer application as long as you
> +account for the protocol handing in the application.
> +
> +If you have a Network Stack in your DPDK application or something like it you
> +can utilize that stack to handle the network protocols. Plus you would be able
> +to address the interface using an IP address assigned to the internal
> +interface.
> +
> +Example
> +-------
> +
> +The following is a simple example of using the TUN/TAP PMD with the Pktgen
> +packet generator. It requires that the ``socat`` utility is installed on the
> +test system.
> +
> +Build DPDK, then pull down Pktgen and build pktgen using the DPDK SDK/Target
> +used to build the dpdk you pulled down.
> +
> +Run pktgen from the pktgen directory in a terminal with a commandline like the
> +following::
> +
> +    sudo ./app/app/x86_64-native-linuxapp-gcc/app/pktgen -l 1-5 -n 4        \
> +     --proc-type auto --log-level 8 --socket-mem 512,512 --file-prefix pg   \
> +     --vdev=net_tap --vdev=net_tap -b 05:00.0 -b 05:00.1                    \
> +     -b 04:00.0 -b 04:00.1 -b 04:00.2 -b 04:00.3                            \
> +     -b 81:00.0 -b 81:00.1 -b 81:00.2 -b 81:00.3                            \
> +     -b 82:00.0 -b 83:00.0 -- -T -P -m [2:3].0 -m [4:5].1                   \
> +     -f themes/black-yellow.theme
> +
> +.. Note:
> +
> +   Change the ``-b`` options to blacklist all of your physical ports. The
> +   following command line is all one line.
> +
> +   Also, ``-f themes/black-yellow.theme`` is optional if the default colors
> +   work on your system configuration. See the Pktgen docs for more
> +   information.
> +
> +Verify with ``ifconfig -a`` command in a different xterm window, should have a
> +``dtap0`` and ``dtap1`` interfaces created.
> +
> +Next set the links for the two interfaces to up via the commands below::
> +
> +    sudo ip link set dtap0 up; sudo ip addr add 192.168.0.250/24 dev dtap0
> +    sudo ip link set dtap1 up; sudo ip addr add 192.168.1.250/24 dev dtap1
> +
> +Then use socat to create a loopback for the two interfaces::
> +
> +    sudo socat interface:dtap0 interface:dtap1
> +
> +Then on the Pktgen command line interface you can start sending packets using
> +the commands ``start 0`` and ``start 1`` or you can start both at the same
> +time with ``start all``. The command ``str`` is an alias for ``start all`` and
> +``stp`` is an alias for ``stop all``.
> +
> +While running you should see the 64 byte counters increasing to verify the
> +traffic is being looped back. You can use ``set all size XXX`` to change the
> +size of the packets after you stop the traffic. Use the pktgen ``help``
> +command to see a list of all commands. You can also use the ``-f`` option to
> +load commands at startup.
> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index bc93230..e366a85 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -51,6 +51,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += pcap
>  DIRS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += ring
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2) += szedata2
> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap
>  DIRS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += thunderx
>  DIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio
>  DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += vmxnet3
> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
> new file mode 100644
> index 0000000..e18f30c
> --- /dev/null
> +++ b/drivers/net/tap/Makefile
> @@ -0,0 +1,57 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +#     * Redistributions of source code must retain the above copyright
> +#       notice, this list of conditions and the following disclaimer.
> +#     * Redistributions in binary form must reproduce the above copyright
> +#       notice, this list of conditions and the following disclaimer in
> +#       the documentation and/or other materials provided with the
> +#       distribution.
> +#     * Neither the name of Intel Corporation nor the names of its
> +#       contributors may be used to endorse or promote products derived
> +#       from this software without specific prior written permission.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_pmd_tap.a
> +
> +EXPORT_MAP := rte_pmd_tap_version.map
> +
> +LIBABIVER := 1
> +
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
> +
> +# this lib depends upon:
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_eal
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mbuf
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_mempool
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_ether
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += lib/librte_kvargs
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> new file mode 100644
> index 0000000..976f2d9
> --- /dev/null
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -0,0 +1,765 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <rte_mbuf.h>
> +#include <rte_ethdev.h>
> +#include <rte_malloc.h>
> +#include <rte_vdev.h>
> +#include <rte_kvargs.h>
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/socket.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <unistd.h>
> +#include <poll.h>
> +#include <arpa/inet.h>
> +#include <linux/if.h>
> +#include <linux/if_tun.h>
> +#include <linux/if_ether.h>
> +#include <fcntl.h>
> +
> +/* Linux based path to the TUN device */
> +#define TUN_TAP_DEV_PATH        "/dev/net/tun"
> +#define DEFAULT_TAP_NAME        "dtap"
> +
> +#define ETH_TAP_IFACE_ARG       "iface"
> +#define ETH_TAP_SPEED_ARG       "speed"
> +
> +#define RTE_PMD_TAP_MAX_QUEUES 16
> +
> +static struct rte_vdev_driver pmd_tap_drv;
> +
> +static const char *valid_arguments[] = {
> +       ETH_TAP_IFACE_ARG,
> +       ETH_TAP_SPEED_ARG,
> +       NULL
> +};
> +
> +static int tap_unit;
> +
> +static struct rte_eth_link pmd_link = {
> +       .link_speed = ETH_SPEED_NUM_10G,
> +       .link_duplex = ETH_LINK_FULL_DUPLEX,
> +       .link_status = ETH_LINK_DOWN,
> +       .link_autoneg = ETH_LINK_SPEED_AUTONEG
> +};
> +
> +struct pkt_stats {
> +       uint64_t opackets;              /* Number of output packets */
> +       uint64_t ipackets;              /* Number of input packets */
> +       uint64_t obytes;                /* Number of bytes on output */
> +       uint64_t ibytes;                /* Number of bytes on input */
> +       uint64_t errs;                  /* Number of error packets */
> +};
> +
> +struct rx_queue {
> +       struct rte_mempool *mp;         /* Mempool for RX packets */
> +       uint16_t in_port;               /* Port ID */
> +       int fd;
> +
> +       struct pkt_stats stats;         /* Stats for this RX queue */
> +};
> +
> +struct tx_queue {
> +       int fd;
> +       struct pkt_stats stats;         /* Stats for this TX queue */
> +};
> +
> +struct pmd_internals {
> +       char name[RTE_ETH_NAME_MAX_LEN];        /* Internal Tap device name */
> +       uint16_t nb_queues;             /* Number of queues supported */
> +       struct ether_addr eth_addr;     /* Mac address of the device port */
> +
> +       int if_index;                   /* IF_INDEX for the port */
> +       int fds[RTE_PMD_TAP_MAX_QUEUES]; /* List of all file descriptors */
> +
> +       struct rx_queue rxq[RTE_PMD_TAP_MAX_QUEUES];    /* List of RX queues */
> +       struct tx_queue txq[RTE_PMD_TAP_MAX_QUEUES];    /* List of TX queues */
> +};
> +
> +/* Tun/Tap allocation routine
> + *
> + * name is the number of the interface to use, unless NULL to take the host
> + * supplied name.
> + */
> +static int
> +tun_alloc(char *name)
> +{
> +       struct ifreq ifr;
> +       unsigned int features;
> +       int fd;
> +
> +       memset(&ifr, 0, sizeof(struct ifreq));
> +
> +       ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
> +       if (name && name[0])
> +               strncpy(ifr.ifr_name, name, IFNAMSIZ);
> +
> +       fd = open(TUN_TAP_DEV_PATH, O_RDWR);
> +       if (fd < 0) {
> +               RTE_LOG(ERR, PMD, "Unable to create TAP interface");
> +               goto error;
> +       }
> +
> +       /* Grab the TUN features to verify we can work */
> +       if (ioctl(fd, TUNGETFEATURES, &features) < 0) {
> +               RTE_LOG(ERR, PMD, "Unable to get TUN/TAP features\n");
> +               goto error;
> +       }
> +       RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
> +
> +       if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
> +               RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
> +               goto error;
> +       } else if ((features & IFF_ONE_QUEUE) &&
> +                       (RTE_PMD_TAP_MAX_QUEUES == 1)) {
> +               ifr.ifr_flags |= IFF_ONE_QUEUE;
> +               RTE_LOG(DEBUG, PMD, "Single queue only support\n");
> +       } else {
> +               ifr.ifr_flags |= IFF_MULTI_QUEUE;
> +               RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
> +                       RTE_PMD_TAP_MAX_QUEUES);
> +       }
> +
> +       /* Set the TUN/TAP configuration and get the name if needed */
> +       if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
> +               RTE_LOG(ERR, PMD, "Unable to set TUNSETIFF for %s\n",
> +                       ifr.ifr_name);
> +               perror("TUNSETIFF");
> +               goto error;
> +       }
> +
> +       /* Always set the file descriptor to non-blocking */
> +       if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
> +               RTE_LOG(ERR, PMD, "Unable to set to nonblocking\n");
> +               perror("F_SETFL, NONBLOCK");
> +               goto error;
> +       }
> +
> +       /* If the name is different that new name as default */
> +       if (name && strcmp(name, ifr.ifr_name))
> +               snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", ifr.ifr_name);
> +
> +       return fd;
> +
> +error:
> +       if (fd > 0)
> +               close(fd);
> +       return -1;
> +}
> +
> +/* Callback to handle the rx burst of packets to the correct interface and
> + * file descriptor(s) in a multi-queue setup.
> + */
> +static uint16_t
> +pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> +{
> +       int len;
> +       struct rte_mbuf *mbuf;
> +       struct rx_queue *rxq = queue;
> +       uint16_t num_rx;
> +       unsigned long num_rx_bytes = 0;
> +
> +       for (num_rx = 0; num_rx < nb_pkts; ) {
> +               /* allocate the next mbuf */
> +               mbuf = rte_pktmbuf_alloc(rxq->mp);
> +               if (unlikely(!mbuf)) {
> +                       RTE_LOG(WARNING, PMD, "Unable to allocate mbuf\n");
> +                       break;
> +               }
> +
> +               len = read(rxq->fd, rte_pktmbuf_mtod(mbuf, char *),
> +                          rte_pktmbuf_tailroom(mbuf));
> +               if (len <= 0) {
> +                       rte_pktmbuf_free(mbuf);
> +                       break;
> +               }
> +
> +               mbuf->data_len = len;
> +               mbuf->pkt_len = len;
> +               mbuf->port = rxq->in_port;
> +
> +               /* account for the receive frame */
> +               bufs[num_rx++] = mbuf;
> +               num_rx_bytes += mbuf->pkt_len;
> +       }
> +       rxq->stats.ipackets += num_rx;
> +       rxq->stats.ibytes += num_rx_bytes;
> +
> +       return num_rx;
> +}
> +
> +/* Callback to handle sending packets from the tap interface
> + */
> +static uint16_t
> +pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
> +{
> +       struct rte_mbuf *mbuf;
> +       struct tx_queue *txq = queue;
> +       struct pollfd pfd;
> +       uint16_t num_tx = 0;
> +       unsigned long num_tx_bytes = 0;
> +       int i, n;
> +
> +       if (unlikely(nb_pkts == 0))
> +               return 0;
> +
> +       pfd.events = POLLOUT;
> +       pfd.fd = txq->fd;
> +       for (i = 0; i < nb_pkts; i++) {
> +               n = poll(&pfd, 1, 0);
> +
> +               if (n <= 0)
> +                       break;
> +
> +               if (pfd.revents & POLLOUT) {
> +                       /* copy the tx frame data */
> +                       mbuf = bufs[num_tx];
> +                       n = write(pfd.fd, rte_pktmbuf_mtod(mbuf, void*),
> +                                 rte_pktmbuf_pkt_len(mbuf));
> +                       if (n <= 0)
> +                               break;
> +
> +                       num_tx++;
> +                       num_tx_bytes += mbuf->pkt_len;
> +                       rte_pktmbuf_free(mbuf);
> +               }
> +       }
> +
> +       txq->stats.opackets += num_tx;
> +       txq->stats.errs += nb_pkts - num_tx;
> +       txq->stats.obytes += num_tx_bytes;
> +
> +       return num_tx;
> +}
> +
> +static int
> +tap_dev_start(struct rte_eth_dev *dev)
> +{
> +       /* Force the Link up */
> +       dev->data->dev_link.link_status = ETH_LINK_UP;
> +
> +       return 0;
> +}
> +
> +/* This function gets called when the current port gets stopped.
> + */
> +static void
> +tap_dev_stop(struct rte_eth_dev *dev)
> +{
> +       int i;
> +       struct pmd_internals *internals = dev->data->dev_private;
> +
> +       for (i = 0; i < internals->nb_queues; i++)
> +               if (internals->fds[i] != -1)
> +                       close(internals->fds[i]);
> +
> +       dev->data->dev_link.link_status = ETH_LINK_DOWN;
> +}
> +
> +static int
> +tap_dev_configure(struct rte_eth_dev *dev __rte_unused)
> +{
> +       return 0;
> +}
> +
> +static void
> +tap_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
> +{
> +       struct pmd_internals *internals = dev->data->dev_private;
> +
> +       dev_info->if_index = internals->if_index;
> +       dev_info->max_mac_addrs = 1;
> +       dev_info->max_rx_pktlen = (uint32_t)ETHER_MAX_VLAN_FRAME_LEN;
> +       dev_info->max_rx_queues = internals->nb_queues;
> +       dev_info->max_tx_queues = internals->nb_queues;
> +       dev_info->min_rx_bufsize = 0;
> +       dev_info->pci_dev = NULL;
> +}
> +
> +static void
> +tap_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *tap_stats)
> +{
> +       unsigned int i, imax;
> +       unsigned long rx_total = 0, tx_total = 0, tx_err_total = 0;
> +       unsigned long rx_bytes_total = 0, tx_bytes_total = 0;
> +       const struct pmd_internals *pmd = dev->data->dev_private;
> +
> +       imax = (pmd->nb_queues < RTE_ETHDEV_QUEUE_STAT_CNTRS) ?
> +               pmd->nb_queues : RTE_ETHDEV_QUEUE_STAT_CNTRS;
> +
> +       for (i = 0; i < imax; i++) {
> +               tap_stats->q_ipackets[i] = pmd->rxq[i].stats.ipackets;
> +               tap_stats->q_ibytes[i] = pmd->rxq[i].stats.ibytes;
> +               rx_total += tap_stats->q_ipackets[i];
> +               rx_bytes_total += tap_stats->q_ibytes[i];
> +       }
> +
> +       for (i = 0; i < imax; i++) {
> +               tap_stats->q_opackets[i] = pmd->txq[i].stats.opackets;
> +               tap_stats->q_errors[i] = pmd->txq[i].stats.errs;
> +               tap_stats->q_obytes[i] = pmd->txq[i].stats.obytes;
> +               tx_total += tap_stats->q_opackets[i];
> +               tx_err_total += tap_stats->q_errors[i];
> +               tx_bytes_total += tap_stats->q_obytes[i];
> +       }
> +
> +       tap_stats->ipackets = rx_total;
> +       tap_stats->ibytes = rx_bytes_total;
> +       tap_stats->opackets = tx_total;
> +       tap_stats->oerrors = tx_err_total;
> +       tap_stats->obytes = tx_bytes_total;
> +}
> +
> +static void
> +tap_stats_reset(struct rte_eth_dev *dev)
> +{
> +       int i;
> +       struct pmd_internals *pmd = dev->data->dev_private;
> +
> +       for (i = 0; i < pmd->nb_queues; i++) {
> +               pmd->rxq[i].stats.ipackets = 0;
> +               pmd->rxq[i].stats.ibytes = 0;
> +       }
> +
> +       for (i = 0; i < pmd->nb_queues; i++) {
> +               pmd->txq[i].stats.opackets = 0;
> +               pmd->txq[i].stats.errs = 0;
> +               pmd->txq[i].stats.obytes = 0;
> +       }
> +}
> +
> +static void
> +tap_dev_close(struct rte_eth_dev *dev __rte_unused)
> +{
> +}
> +
> +static void
> +tap_rx_queue_release(void *queue)
> +{
> +       struct rx_queue *rxq = queue;
> +
> +       if (rxq && (rxq->fd > 0)) {
> +               close(rxq->fd);
> +               rxq->fd = -1;
> +       }
> +}
> +
> +static void
> +tap_tx_queue_release(void *queue)
> +{
> +       struct tx_queue *txq = queue;
> +
> +       if (txq && (txq->fd > 0)) {
> +               close(txq->fd);
> +               txq->fd = -1;
> +       }
> +}
> +
> +static int
> +tap_link_update(struct rte_eth_dev *dev __rte_unused,
> +               int wait_to_complete __rte_unused)
> +{
> +       return 0;
> +}
> +
> +static int
> +tap_setup_queue(struct rte_eth_dev *dev,
> +               struct pmd_internals *internals,
> +               uint16_t qid)
> +{
> +       struct rx_queue *rx = &internals->rxq[qid];
> +       struct tx_queue *tx = &internals->txq[qid];
> +       int fd;
> +
> +       fd = rx->fd;
> +       if (fd < 0) {
> +               fd = tx->fd;
> +               if (fd < 0) {
> +                       RTE_LOG(INFO, PMD, "Add queue to TAP %s for qid %d\n",
> +                               dev->data->name, qid);
> +                       fd = tun_alloc(dev->data->name);
> +                       if (fd < 0) {
> +                               RTE_LOG(ERR, PMD, "tun_alloc(%s) failed\n",
> +                                       dev->data->name);
> +                               return -1;
> +                       }
> +               }
> +       }
> +       dev->data->rx_queues[qid] = rx;
> +       dev->data->tx_queues[qid] = tx;
> +
> +       rx->fd = fd;
> +       tx->fd = fd;
> +
> +       return fd;
> +}
> +
> +static int
> +tap_rx_queue_setup(struct rte_eth_dev *dev,
> +                  uint16_t rx_queue_id,
> +                  uint16_t nb_rx_desc __rte_unused,
> +                  unsigned int socket_id __rte_unused,
> +                  const struct rte_eth_rxconf *rx_conf __rte_unused,
> +                  struct rte_mempool *mp)
> +{
> +       struct pmd_internals *internals = dev->data->dev_private;
> +       uint16_t buf_size;
> +       int fd;
> +
> +       if ((rx_queue_id >= internals->nb_queues) || !mp) {
> +               RTE_LOG(ERR, PMD, "nb_queues %d mp %p\n",
> +                       internals->nb_queues, mp);
> +               return -1;
> +       }
> +
> +       internals->rxq[rx_queue_id].mp = mp;
> +       internals->rxq[rx_queue_id].in_port = dev->data->port_id;
> +
> +       /* Now get the space available for data in the mbuf */
> +       buf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
> +                               RTE_PKTMBUF_HEADROOM);
> +
> +       if (buf_size < ETH_FRAME_LEN) {
> +               RTE_LOG(ERR, PMD,
> +                       "%s: %d bytes will not fit in mbuf (%d bytes)\n",
> +                       dev->data->name, ETH_FRAME_LEN, buf_size);
> +               return -ENOMEM;
> +       }
> +
> +       fd = tap_setup_queue(dev, internals, rx_queue_id);
> +       if (fd == -1)
> +               return -1;
> +
> +       internals->fds[rx_queue_id] = fd;
> +       RTE_LOG(INFO, PMD, "RX TAP device name %s, qid %d on fd %d\n",
> +               dev->data->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
> +
> +       return 0;
> +}
> +
> +static int
> +tap_tx_queue_setup(struct rte_eth_dev *dev,
> +                  uint16_t tx_queue_id,
> +                  uint16_t nb_tx_desc __rte_unused,
> +                  unsigned int socket_id __rte_unused,
> +                  const struct rte_eth_txconf *tx_conf __rte_unused)
> +{
> +       struct pmd_internals *internals = dev->data->dev_private;
> +       int ret;
> +
> +       if (tx_queue_id >= internals->nb_queues)
> +               return -1;
> +
> +       ret = tap_setup_queue(dev, internals, tx_queue_id);
> +       if (ret == -1)
> +               return -1;
> +
> +       RTE_LOG(INFO, PMD, "TX TAP device name %s, qid %d on fd %d\n",
> +               dev->data->name, tx_queue_id, internals->txq[tx_queue_id].fd);
> +
> +       return 0;
> +}
> +
> +static const struct eth_dev_ops ops = {
> +       .dev_start              = tap_dev_start,
> +       .dev_stop               = tap_dev_stop,
> +       .dev_close              = tap_dev_close,
> +       .dev_configure          = tap_dev_configure,
> +       .dev_infos_get          = tap_dev_info,
> +       .rx_queue_setup         = tap_rx_queue_setup,
> +       .tx_queue_setup         = tap_tx_queue_setup,
> +       .rx_queue_release       = tap_rx_queue_release,
> +       .tx_queue_release       = tap_tx_queue_release,
> +       .link_update            = tap_link_update,
> +       .stats_get              = tap_stats_get,
> +       .stats_reset            = tap_stats_reset,
> +};
> +
> +static int
> +pmd_mac_address(int fd, struct rte_eth_dev *dev, struct ether_addr *addr)
> +{
> +       struct ifreq ifr;
> +
> +       if ((fd <= 0) || !dev || !addr)
> +               return -1;
> +
> +       memset(&ifr, 0, sizeof(ifr));
> +
> +       if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
> +               RTE_LOG(ERR, PMD, "ioctl failed (SIOCGIFHWADDR) (%s)\n",
> +                       ifr.ifr_name);
> +               return -1;
> +       }
> +
> +       /* Set the host based MAC address to this special MAC format */
> +       ifr.ifr_hwaddr.sa_data[0] = 'T';
> +       ifr.ifr_hwaddr.sa_data[1] = 'a';
> +       ifr.ifr_hwaddr.sa_data[2] = 'p';
> +       ifr.ifr_hwaddr.sa_data[3] = '-';
> +       ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
> +       ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
> +       if (ioctl(fd, SIOCSIFHWADDR, &ifr) == -1) {
> +               RTE_LOG(ERR, PMD, "%s: ioctl failed (SIOCSIFHWADDR) (%s)\n",
> +                       dev->data->name, ifr.ifr_name);
> +               return -1;
> +       }
> +
> +       /* Set the local application MAC address, needs to be different then
> +        * the host based MAC address.
> +        */
> +       ifr.ifr_hwaddr.sa_data[0] = 'd';
> +       ifr.ifr_hwaddr.sa_data[1] = 'n';
> +       ifr.ifr_hwaddr.sa_data[2] = 'e';
> +       ifr.ifr_hwaddr.sa_data[3] = 't';
> +       ifr.ifr_hwaddr.sa_data[4] = dev->data->port_id;
> +       ifr.ifr_hwaddr.sa_data[5] = dev->data->numa_node;
> +       rte_memcpy(addr, ifr.ifr_hwaddr.sa_data, ETH_ALEN);
> +
> +       return 0;
> +}
> +
> +static int
> +eth_dev_tap_create(const char *name, char *tap_name)
> +{
> +       int numa_node = rte_socket_id();
> +       struct rte_eth_dev *dev = NULL;
> +       struct pmd_internals *pmd = NULL;
> +       struct rte_eth_dev_data *data = NULL;
> +       int i, fd = -1;
> +
> +       RTE_LOG(INFO, PMD,
> +               "%s: Create TAP Ethernet device with %d queues on numa %u\n",
> +                name, RTE_PMD_TAP_MAX_QUEUES, rte_socket_id());
> +
> +       data = rte_zmalloc_socket(tap_name, sizeof(*data), 0, numa_node);
> +       if (!data) {
> +               RTE_LOG(INFO, PMD, "Failed to allocate data\n");
> +               goto error_exit;
> +       }
> +
> +       pmd = rte_zmalloc_socket(tap_name, sizeof(*pmd), 0, numa_node);
> +       if (!pmd) {
> +               RTE_LOG(INFO, PMD, "Unable to allocate internal struct\n");
> +               goto error_exit;
> +       }
> +
> +       /* Use the name and not the tap_name */
> +       dev = rte_eth_dev_allocate(tap_name);
> +       if (!dev) {
> +               RTE_LOG(INFO, PMD, "Unable to allocate device struct\n");
> +               goto error_exit;
> +       }
> +
> +       snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
> +
> +       pmd->nb_queues = RTE_PMD_TAP_MAX_QUEUES;
> +
> +       /* Setup some default values */
> +       data->dev_private = pmd;
> +       data->port_id = dev->data->port_id;
> +       data->dev_flags = RTE_ETH_DEV_DETACHABLE;
> +       data->kdrv = RTE_KDRV_NONE;
> +       data->drv_name = pmd_tap_drv.driver.name;
> +       data->numa_node = numa_node;
> +
> +       data->dev_link = pmd_link;
> +       data->mac_addrs = &pmd->eth_addr;
> +       data->nb_rx_queues = pmd->nb_queues;
> +       data->nb_tx_queues = pmd->nb_queues;
> +
> +       dev->data = data;
> +       dev->dev_ops = &ops;
> +       dev->driver = NULL;
> +       dev->rx_pkt_burst = pmd_rx_burst;
> +       dev->tx_pkt_burst = pmd_tx_burst;
> +       snprintf(dev->data->name, sizeof(dev->data->name), "%s", name);
> +
> +       /* Create the first Tap device */
> +       fd = tun_alloc(tap_name);
> +       if (fd < 0) {
> +               RTE_LOG(INFO, PMD, "tun_alloc() failed\n");
> +               goto error_exit;
> +       }
> +
> +       /* Presetup the fds to -1 as being not working */
> +       for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
> +               pmd->fds[i] = -1;
> +               pmd->rxq[i].fd = -1;
> +               pmd->txq[i].fd = -1;
> +       }
> +
> +       /* Take the TUN/TAP fd and place in the first location */
> +       pmd->rxq[0].fd = fd;
> +       pmd->txq[0].fd = fd;
> +       pmd->fds[0] = fd;
> +
> +       if (pmd_mac_address(fd, dev, &pmd->eth_addr) < 0) {
> +               RTE_LOG(INFO, PMD, "Unable to get MAC address\n");
> +               goto error_exit;
> +       }
> +
> +       return 0;
> +
> +error_exit:
> +       RTE_PMD_DEBUG_TRACE("Unable to initialize %s\n", name);
> +
> +       rte_free(data);
> +       rte_free(pmd);
> +
> +       rte_eth_dev_release_port(dev);
> +
> +       return -EINVAL;
> +}
> +
> +static int
> +set_interface_name(const char *key __rte_unused,
> +                  const char *value,
> +                  void *extra_args)
> +{
> +       char *name = (char *)extra_args;
> +
> +       if (value)
> +               snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s", value);
> +       else
> +               snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d",
> +                        DEFAULT_TAP_NAME, (tap_unit - 1));
> +
> +       return 0;
> +}
> +
> +static int
> +set_interface_speed(const char *key __rte_unused,
> +                   const char *value,
> +                   void *extra_args)
> +{
> +       *(int *)extra_args = (value) ? atoi(value) : ETH_SPEED_NUM_10G;
> +
> +       return 0;
> +}
> +
> +/* Open a TAP interface device.
> + */
> +static int
> +rte_pmd_tap_probe(const char *name, const char *params)
> +{
> +       int ret;
> +       struct rte_kvargs *kvlist = NULL;
> +       int speed;
> +       char tap_name[RTE_ETH_NAME_MAX_LEN];
> +
> +       speed = ETH_SPEED_NUM_10G;
> +       snprintf(tap_name, sizeof(tap_name), "%s%d",
> +                DEFAULT_TAP_NAME, tap_unit++);
> +
> +       RTE_LOG(INFO, PMD, "Initializing pmd_tap for %s as %s\n",
> +               name, tap_name);
> +
> +       if (params && (params[0] != '\0')) {
> +               RTE_LOG(INFO, PMD, "paramaters (%s)\n", params);
> +
> +               kvlist = rte_kvargs_parse(params, valid_arguments);
> +               if (kvlist) {
> +                       if (rte_kvargs_count(kvlist, ETH_TAP_SPEED_ARG) == 1) {
> +                               ret = rte_kvargs_process(kvlist,
> +                                                        ETH_TAP_SPEED_ARG,
> +                                                        &set_interface_speed,
> +                                                        &speed);
> +                               if (ret == -1)
> +                                       goto leave;
> +                       }
> +
> +                       if (rte_kvargs_count(kvlist, ETH_TAP_IFACE_ARG) == 1) {
> +                               ret = rte_kvargs_process(kvlist,
> +                                                        ETH_TAP_IFACE_ARG,
> +                                                        &set_interface_name,
> +                                                        tap_name);
> +                               if (ret == -1)
> +                                       goto leave;
> +                       }
> +               }
> +       }
> +       pmd_link.link_speed = speed;
> +
> +       ret = eth_dev_tap_create(name, tap_name);
> +
> +leave:
> +       if (ret == -1) {
> +               RTE_LOG(INFO, PMD, "Failed to create pmd for %s as %s\n",
> +                       name, tap_name);
> +               tap_unit--;             /* Restore the unit number */
> +       }
> +       rte_kvargs_free(kvlist);
> +
> +       return ret;
> +}
> +
> +/* detach a TAP device.
> + */
> +static int
> +rte_pmd_tap_remove(const char *name)
> +{
> +       struct rte_eth_dev *eth_dev = NULL;
> +       struct pmd_internals *internals;
> +       int i;
> +
> +       RTE_LOG(INFO, PMD, "Closing TUN/TAP Ethernet device on numa %u\n",
> +               rte_socket_id());
> +
> +       /* find the ethdev entry */
> +       eth_dev = rte_eth_dev_allocated(name);
> +       if (!eth_dev)
> +               return 0;
> +
> +       internals = eth_dev->data->dev_private;
> +       for (i = 0; i < internals->nb_queues; i++)
> +               if (internals->fds[i] != -1)
> +                       close(internals->fds[i]);
> +
> +       rte_free(eth_dev->data->dev_private);
> +       rte_free(eth_dev->data);
> +
> +       rte_eth_dev_release_port(eth_dev);
> +
> +       return 0;
> +}
> +
> +static struct rte_vdev_driver pmd_tap_drv = {
> +       .probe = rte_pmd_tap_probe,
> +       .remove = rte_pmd_tap_remove,
> +};
> +RTE_PMD_REGISTER_VDEV(net_tap, pmd_tap_drv);
> +RTE_PMD_REGISTER_ALIAS(net_tap, eth_tap);
> +RTE_PMD_REGISTER_PARAM_STRING(net_tap, "iface=<string>,speed=N");
> diff --git a/drivers/net/tap/rte_pmd_tap_version.map b/drivers/net/tap/rte_pmd_tap_version.map
> new file mode 100644
> index 0000000..31eca32
> --- /dev/null
> +++ b/drivers/net/tap/rte_pmd_tap_version.map
> @@ -0,0 +1,4 @@
> +DPDK_17.02 {
> +
> +       local: *;
> +};
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index f75f0e2..02c32ae 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -124,6 +124,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD) += -lrte_pmd_thunderx_nicvf -lm
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
>  ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
> --
> 2.8.0.GIT
> 
> 

Regards,
Keith

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v12] net/tap: new TUN/TAP device PMD
  2016-12-12 14:38       ` Keith Wiles
  2016-12-12 19:13         ` Marc
@ 2016-12-13 13:54         ` Ferruh Yigit
  2017-01-20 12:14           ` Ferruh Yigit
  1 sibling, 1 reply; 59+ messages in thread
From: Ferruh Yigit @ 2016-12-13 13:54 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu, john.mcnamara

On 12/12/2016 2:38 PM, Keith Wiles wrote:
> The PMD allows for DPDK and the host to communicate using a raw
> device interface on the host and in the DPDK application. The device
> created is a Tap device with a L2 packet header.
> 
> v12- Fixup minor changes for driver_name and version number
> v11- Add the tap.rst to the nic/index.rst file
> v10- Change the string name used to allow for multiple devices.
> v9 - Fix up the docs to use correct syntax
> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
> v7 - Reword the comment in common_base and fix the data->name issue
> v6 - fixed the checkpatch issues
> v5 - merge in changes from list review see related emails
>      fixed many minor edits
> v4 - merge with latest driver changes
> v3 - fix includes by removing ifdef for other type besides Linux
>      Fix the copyright notice in the Makefile
> v2 - merge all of the patches into one patch
>      Fix a typo on naming the tap device
>      Update the maintainers list
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---

    Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
    Tested-by: Aws Ismail <aismail@ciena.com>
    Tested-by: Vasily Philipov <vasilyf@mellanox.com>

Applied to dpdk-next-net/master, thanks.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH v12] net/tap: new TUN/TAP device PMD
  2016-12-13 13:54         ` Ferruh Yigit
@ 2017-01-20 12:14           ` Ferruh Yigit
  2017-01-20 14:25             ` [PATCH] net/tap: fix IFF_MULTI_QUEUE in older kernels not found Keith Wiles
  2017-01-20 14:30             ` Keith Wiles
  0 siblings, 2 replies; 59+ messages in thread
From: Ferruh Yigit @ 2017-01-20 12:14 UTC (permalink / raw)
  To: Keith Wiles, dev; +Cc: pmatilai, yuanhan.liu, john.mcnamara

On 12/13/2016 1:54 PM, Ferruh Yigit wrote:
> On 12/12/2016 2:38 PM, Keith Wiles wrote:
>> The PMD allows for DPDK and the host to communicate using a raw
>> device interface on the host and in the DPDK application. The device
>> created is a Tap device with a L2 packet header.
>>
>> v12- Fixup minor changes for driver_name and version number
>> v11- Add the tap.rst to the nic/index.rst file
>> v10- Change the string name used to allow for multiple devices.
>> v9 - Fix up the docs to use correct syntax
>> v8 - Fix issue with tap_tx_queue_setup() not return zero on success.
>> v7 - Reword the comment in common_base and fix the data->name issue
>> v6 - fixed the checkpatch issues
>> v5 - merge in changes from list review see related emails
>>      fixed many minor edits
>> v4 - merge with latest driver changes
>> v3 - fix includes by removing ifdef for other type besides Linux
>>      Fix the copyright notice in the Makefile
>> v2 - merge all of the patches into one patch
>>      Fix a typo on naming the tap device
>>      Update the maintainers list
>>
>> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>> ---
> 
>     Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
>     Tested-by: Aws Ismail <aismail@ciena.com>
>     Tested-by: Vasily Philipov <vasilyf@mellanox.com>
> 
> Applied to dpdk-next-net/master, thanks.
> 

Hi Keith,

test-report [1] shows build error [2] for the PMD, I guess there is a
kernel version dependency, FYI.

Issue has been caught after integration.


[1]
http://dpdk.org/ml/archives/test-report/2017-January/009616.html


[2]
DPDK/drivers/net/tap/rte_eth_tap.c: In function ‘tun_alloc’:
DPDK/drivers/net/tap/rte_eth_tap.c:143:19: error: ‘IFF_MULTI_QUEUE’
undeclared (first use in this function)
compilation terminated due to -Wfatal-errors.

^ permalink raw reply	[flat|nested] 59+ messages in thread

* [PATCH] net/tap: fix IFF_MULTI_QUEUE in older kernels not found
  2017-01-20 12:14           ` Ferruh Yigit
@ 2017-01-20 14:25             ` Keith Wiles
  2017-01-20 14:30             ` Keith Wiles
  1 sibling, 0 replies; 59+ messages in thread
From: Keith Wiles @ 2017-01-20 14:25 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 drivers/net/tap/rte_eth_tap.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index d5e2fc3..5cf9831 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -139,6 +139,7 @@ tun_alloc(char *name)
 	}
 	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
 
+#ifdef IFF_MULTI_QUEUE
 	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
 		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
 		goto error;
@@ -151,6 +152,15 @@ tun_alloc(char *name)
 		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
 			RTE_PMD_TAP_MAX_QUEUES);
 	}
+#else
+    if (RTE_PMD_TAP_MAX_QUEUES > 1) {
+        RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+        goto error;
+    } else {
+        ifr.ifr_flags |= IFF_ONE_QUEUE;
+        RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+    }
+#endif
 
 	/* Set the TUN/TAP configuration and get the name if needed */
 	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* [PATCH] net/tap: fix IFF_MULTI_QUEUE in older kernels not found
  2017-01-20 12:14           ` Ferruh Yigit
  2017-01-20 14:25             ` [PATCH] net/tap: fix IFF_MULTI_QUEUE in older kernels not found Keith Wiles
@ 2017-01-20 14:30             ` Keith Wiles
  2017-01-20 16:11               ` Ferruh Yigit
  1 sibling, 1 reply; 59+ messages in thread
From: Keith Wiles @ 2017-01-20 14:30 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 drivers/net/tap/rte_eth_tap.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index d5e2fc3..cb96e99 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -139,6 +139,7 @@ tun_alloc(char *name)
 	}
 	RTE_LOG(DEBUG, PMD, "TUN/TAP Features %08x\n", features);
 
+#ifdef IFF_MULTI_QUEUE
 	if (!(features & IFF_MULTI_QUEUE) && (RTE_PMD_TAP_MAX_QUEUES > 1)) {
 		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
 		goto error;
@@ -151,6 +152,15 @@ tun_alloc(char *name)
 		RTE_LOG(DEBUG, PMD, "Multi-queue support for %d queues\n",
 			RTE_PMD_TAP_MAX_QUEUES);
 	}
+#else
+	if (RTE_PMD_TAP_MAX_QUEUES > 1) {
+		RTE_LOG(DEBUG, PMD, "TUN/TAP device only one queue\n");
+		goto error;
+	} else {
+		ifr.ifr_flags |= IFF_ONE_QUEUE;
+		RTE_LOG(DEBUG, PMD, "Single queue only support\n");
+	}
+#endif
 
 	/* Set the TUN/TAP configuration and get the name if needed */
 	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
-- 
2.8.0.GIT

^ permalink raw reply related	[flat|nested] 59+ messages in thread

* Re: [PATCH] net/tap: fix IFF_MULTI_QUEUE in older kernels not found
  2017-01-20 14:30             ` Keith Wiles
@ 2017-01-20 16:11               ` Ferruh Yigit
  2017-01-20 17:37                 ` Thomas Monjalon
  0 siblings, 1 reply; 59+ messages in thread
From: Ferruh Yigit @ 2017-01-20 16:11 UTC (permalink / raw)
  To: Keith Wiles, dev, Thomas Monjalon

On 1/20/2017 2:30 PM, Keith Wiles wrote:
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>

Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>

Hi Thomas,

Although this is a driver patch, would you mind getting it to the main
tree to fix build error asap (instead of waiting next integration)?

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 59+ messages in thread

* Re: [PATCH] net/tap: fix IFF_MULTI_QUEUE in older kernels not found
  2017-01-20 16:11               ` Ferruh Yigit
@ 2017-01-20 17:37                 ` Thomas Monjalon
  0 siblings, 0 replies; 59+ messages in thread
From: Thomas Monjalon @ 2017-01-20 17:37 UTC (permalink / raw)
  To: Ferruh Yigit, Keith Wiles; +Cc: dev

2017-01-20 16:11, Ferruh Yigit:
> On 1/20/2017 2:30 PM, Keith Wiles wrote:
> > Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> 
> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
> 
> Hi Thomas,
> 
> Although this is a driver patch, would you mind getting it to the main
> tree to fix build error asap (instead of waiting next integration)?

Applied, thanks

^ permalink raw reply	[flat|nested] 59+ messages in thread

end of thread, other threads:[~2017-01-20 17:37 UTC | newest]

Thread overview: 59+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-15 14:10 [PATCH 1/3] drivers/net:new PMD using tun/tap host interface Keith Wiles
2016-09-15 14:10 ` [PATCH 2/3] docs:tun/tap PMD information Keith Wiles
2016-09-15 14:13   ` Wiles, Keith
2016-09-15 14:15     ` Wiles, Keith
2016-09-21  2:00   ` [PATCH v3] drivers/net:new PMD using tun/tap host interface Keith Wiles
2016-10-04 14:45     ` [PATCH v4] " Keith Wiles
2016-10-11  9:40       ` Ferruh Yigit
2016-10-11 11:30       ` Michał Mirosław
2016-10-11 20:56         ` Wiles, Keith
2016-10-12  8:14           ` Michał Mirosław
2016-10-11 11:49       ` Ferruh Yigit
2016-10-11 21:07         ` Wiles, Keith
2016-10-11 12:28       ` Ferruh Yigit
2016-10-11 20:57         ` Wiles, Keith
2016-10-11 21:51       ` [PATCH v5] " Keith Wiles
2016-10-12 14:56         ` Ferruh Yigit
2016-10-12 18:19           ` Wiles, Keith
2016-10-12 19:57             ` Wiles, Keith
2016-10-12 20:54     ` [PATCH v6] " Keith Wiles
2016-10-13 14:41       ` Ferruh Yigit
2016-10-13 15:36     ` [PATCH v7] " Keith Wiles
2016-10-13 16:11     ` [PATCH v8] " Keith Wiles
2016-10-13 16:33       ` Mcnamara, John
2016-10-13 22:03     ` [PATCH v9] " Keith Wiles
2016-10-14  6:41       ` Mcnamara, John
2016-10-14  9:39       ` Ferruh Yigit
2016-11-21 12:56       ` Ferruh Yigit
2016-11-25 19:38         ` Aws Ismail
2016-11-29 21:36           ` Aws Ismail
2016-11-29 22:16             ` Wiles, Keith
2016-12-07 19:38       ` [PATCH v10] drivers/net:new TUN/TAP device PMD Keith Wiles
2016-12-07 20:15         ` Aws Ismail
2016-12-09 18:16         ` Ferruh Yigit
2016-12-09 19:05       ` [PATCH v11] " Keith Wiles
2016-12-12 12:39         ` Vasily Philipov
2016-12-12 14:24       ` [PATCH v12] net/tap: new " Keith Wiles
2016-12-12 14:38       ` Keith Wiles
2016-12-12 19:13         ` Marc
2016-12-12 21:09           ` Wiles, Keith
2016-12-13 13:54         ` Ferruh Yigit
2017-01-20 12:14           ` Ferruh Yigit
2017-01-20 14:25             ` [PATCH] net/tap: fix IFF_MULTI_QUEUE in older kernels not found Keith Wiles
2017-01-20 14:30             ` Keith Wiles
2017-01-20 16:11               ` Ferruh Yigit
2017-01-20 17:37                 ` Thomas Monjalon
2016-09-15 14:10 ` [PATCH 3/3] drivers/net:build support for new tap device driver Keith Wiles
2016-09-16  7:36   ` Panu Matilainen
2016-09-16 14:46     ` Wiles, Keith
2016-09-16 16:22 ` [PATCH v2] drivers/net:new PMD using tun/tap host interface Keith Wiles
2016-09-18 13:25   ` Yuanhan Liu
2016-09-18 16:20     ` Wiles, Keith
2016-09-19  0:29       ` Yuanhan Liu
2016-09-19 15:56         ` Wiles, Keith
2016-09-20  3:54           ` Yuanhan Liu
2016-09-20  4:05   ` Yuanhan Liu
2016-09-21  1:32     ` Wiles, Keith
2016-09-21  2:13       ` Yuanhan Liu
2016-09-21  8:24         ` Thomas Monjalon
2016-09-21 23:55           ` Wiles, Keith

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.