From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tetsuya Mukawa Subject: Re: [PATCH 3/3] vhost: Add VHOST PMD Date: Tue, 10 Nov 2015 12:27:28 +0900 Message-ID: <56416420.90601@igel.co.jp> References: <1445507150-1481-2-git-send-email-mukawa@igel.co.jp> <1445926375-18986-1-git-send-email-mukawa@igel.co.jp> <1445926375-18986-4-git-send-email-mukawa@igel.co.jp> <20151109142505.2bdf56e2@xeon-e3> Mime-Version: 1.0 Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Cc: dev@dpdk.org, ann.zhuangyanying@huawei.com To: Stephen Hemminger Return-path: Received: from mail-pa0-f51.google.com (mail-pa0-f51.google.com [209.85.220.51]) by dpdk.org (Postfix) with ESMTP id 5A064591E for ; Tue, 10 Nov 2015 04:27:32 +0100 (CET) Received: by pasz6 with SMTP id z6so226654067pas.2 for ; Mon, 09 Nov 2015 19:27:31 -0800 (PST) In-Reply-To: <20151109142505.2bdf56e2@xeon-e3> List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" On 2015/11/10 7:25, Stephen Hemminger wrote: > On Tue, 27 Oct 2015 15:12:55 +0900 > Tetsuya Mukawa wrote: > >> The patch introduces a new PMD. This PMD is implemented as thin wrapper >> of librte_vhost. It means librte_vhost is also needed to compile the PMD. >> The vhost messages will be handled only when a port is started. So start >> a port first, then invoke QEMU. >> >> The PMD has 2 parameters. >> - iface: The parameter is used to specify a path to connect to a >> virtio-net device. >> - queues: The parameter is used to specify the number of the queues >> virtio-net device has. >> (Default: 1) >> >> Here is an example. >> $ ./testpmd -c f -n 4 --vdev 'eth_vhost0,iface=/tmp/sock0,queues=1' -- -i >> >> To connect above testpmd, here is qemu command example. >> >> $ qemu-system-x86_64 \ >> >> -chardev socket,id=chr0,path=/tmp/sock0 \ >> -netdev vhost-user,id=net0,chardev=chr0,vhostforce,queues=1 \ >> -device virtio-net-pci,netdev=net0 >> >> Signed-off-by: Tetsuya Mukawa > Brocade developed a much simpler vhost PMD, without all the atomics and > locking. > Hi Stephen, With your PMD, it seems we need to call some vhost library APIs before start sending and receiving. It means we need to manage virtio-net device connections in DPDK application anyway. Also, I guess all PMDs should provide feature to be replaced by one of other PMD without heavy modification for DPDK application. This is because I tried to manage virtio-net device connections in vhost PMD. Thanks, Tetsuya > /*- > * BSD LICENSE > * > * Copyright (C) Brocade Communications Systems, Inc. > * All rights reserved. > * > * Redistribution and use in source and binary forms, with or without > * modification, are permitted provided that the following conditions > * are met: > * > * * Redistributions of source code must retain the above copyright > * notice, this list of conditions and the following disclaimer. > * * Redistributions in binary form must reproduce the above copyright > * notice, this list of conditions and the following disclaimer in > * the documentation and/or other materials provided with the > * distribution. > * * Neither the name of Brocade Communications Systems, Inc. > * nor the names of its contributors may be used to endorse > * or promote products derived from this software without specific > * prior written permission. > * > * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS > * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT > * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR > * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT > * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, > * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY > * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT > * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. > */ > > #include > #include > #include > #include > #include > #include > #include > > #include "../librte_vhost/rte_virtio_net.h" > #include "../librte_vhost/virtio-net.h" > > struct pmd_internals; > > struct vhost_queue { > struct pmd_internals *internals; > > struct rte_mempool *mb_pool; > > uint64_t pkts; > uint64_t bytes; > }; > > struct pmd_internals { > struct virtio_net *dev; > unsigned numa_node; > struct eth_driver *eth_drv; > > unsigned nb_rx_queues; > unsigned nb_tx_queues; > > struct vhost_queue rx_queues[1]; > struct vhost_queue tx_queues[1]; > uint8_t port_id; > }; > > > static const char *drivername = "Vhost PMD"; > > static struct rte_eth_link pmd_link = { > .link_speed = 10000, > .link_duplex = ETH_LINK_FULL_DUPLEX, > .link_status = 0 > }; > > static uint16_t > eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) > { > int ret, i; > struct vhost_queue *h = q; > > ret = rte_vhost_dequeue_burst(h->internals->dev, > VIRTIO_TXQ, h->mb_pool, bufs, nb_bufs); > > for (i = 0; i < ret ; i++) { > struct rte_mbuf *m = bufs[i]; > > m->port = h->internals->port_id; > ++h->pkts; > h->bytes += rte_pktmbuf_pkt_len(m); > } > return ret; > } > > static uint16_t > eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) > { > int ret, i; > struct vhost_queue *h = q; > > ret = rte_vhost_enqueue_burst(h->internals->dev, > VIRTIO_RXQ, bufs, nb_bufs); > > for (i = 0; i < ret; i++) { > struct rte_mbuf *m = bufs[i]; > > ++h->pkts; > h->bytes += rte_pktmbuf_pkt_len(m); > rte_pktmbuf_free(m); > } > > return ret; > } > > static int > eth_dev_configure(struct rte_eth_dev *dev __rte_unused) > { > return 0; > } > > static int > eth_dev_start(struct rte_eth_dev *dev) > { > struct pmd_internals *internals = dev->data->dev_private; > > dev->data->dev_link.link_status = 1; > RTE_LOG(INFO, PMD, "vhost(%s): link up\n", internals->dev->ifname); > return 0; > } > > static void > eth_dev_stop(struct rte_eth_dev *dev) > { > struct pmd_internals *internals = dev->data->dev_private; > > dev->data->dev_link.link_status = 0; > RTE_LOG(INFO, PMD, "vhost(%s): link down\n", internals->dev->ifname); > } > > static int > eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, > uint16_t nb_rx_desc __rte_unused, > unsigned int socket_id __rte_unused, > const struct rte_eth_rxconf *rx_conf __rte_unused, > struct rte_mempool *mb_pool) > { > struct pmd_internals *internals = dev->data->dev_private; > > internals->rx_queues[rx_queue_id].mb_pool = mb_pool; > dev->data->rx_queues[rx_queue_id] = > &internals->rx_queues[rx_queue_id]; > internals->rx_queues[rx_queue_id].internals = internals; > > return 0; > } > > static int > eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, > uint16_t nb_tx_desc __rte_unused, > unsigned int socket_id __rte_unused, > const struct rte_eth_txconf *tx_conf __rte_unused) > { > struct pmd_internals *internals = dev->data->dev_private; > > dev->data->tx_queues[tx_queue_id] = > &internals->tx_queues[tx_queue_id]; > internals->tx_queues[tx_queue_id].internals = internals; > > return 0; > } > > > static void > eth_dev_info(struct rte_eth_dev *dev, > struct rte_eth_dev_info *dev_info) > { > struct pmd_internals *internals = dev->data->dev_private; > > dev_info->driver_name = drivername; > dev_info->max_mac_addrs = 1; > dev_info->max_rx_pktlen = -1; > dev_info->max_rx_queues = (uint16_t)internals->nb_rx_queues; > dev_info->max_tx_queues = (uint16_t)internals->nb_tx_queues; > dev_info->min_rx_bufsize = 0; > dev_info->pci_dev = NULL; > } > > static void > eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) > { > const struct pmd_internals *internal = dev->data->dev_private; > unsigned i; > > for (i = 0; i < internal->nb_rx_queues; i++) { > const struct vhost_queue *h = &internal->rx_queues[i]; > > stats->ipackets += h->pkts; > stats->ibytes += h->bytes; > > if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { > stats->q_ibytes[i] = h->bytes; > stats->q_ipackets[i] = h->pkts; > } > } > > for (i = 0; i < internal->nb_tx_queues; i++) { > const struct vhost_queue *h = &internal->tx_queues[i]; > > stats->opackets += h->pkts; > stats->obytes += h->bytes; > > if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { > stats->q_obytes[i] = h->bytes; > stats->q_opackets[i] = h->pkts; > } > } > } > > static void > eth_stats_reset(struct rte_eth_dev *dev) > { > unsigned i; > struct pmd_internals *internal = dev->data->dev_private; > > for (i = 0; i < internal->nb_rx_queues; i++) { > internal->rx_queues[i].pkts = 0; > internal->rx_queues[i].bytes = 0; > } > > for (i = 0; i < internal->nb_tx_queues; i++) { > internal->tx_queues[i].pkts = 0; > internal->tx_queues[i].bytes = 0; > } > } > > static struct eth_driver rte_vhost_pmd = { > .pci_drv = { > .name = "rte_vhost_pmd", > .drv_flags = RTE_PCI_DRV_DETACHABLE, > }, > }; > > static void > eth_queue_release(void *q __rte_unused) > { > } > > static int > eth_link_update(struct rte_eth_dev *dev __rte_unused, > int wait_to_complete __rte_unused) > { > return 0; > } > > static struct eth_dev_ops eth_ops = { > .dev_start = eth_dev_start, > .dev_stop = eth_dev_stop, > .dev_configure = eth_dev_configure, > .dev_infos_get = eth_dev_info, > .rx_queue_setup = eth_rx_queue_setup, > .tx_queue_setup = eth_tx_queue_setup, > .rx_queue_release = eth_queue_release, > .tx_queue_release = eth_queue_release, > .link_update = eth_link_update, > .stats_get = eth_stats_get, > .stats_reset = eth_stats_reset, > }; > > static int > eth_dev_vhost_create(const char *name, const unsigned numa_node) > { > const unsigned nb_rx_queues = 1; > const unsigned nb_tx_queues = 1; > struct rte_eth_dev_data *data = NULL; > struct rte_pci_device *pci_dev = NULL; > struct pmd_internals *internals = NULL; > struct rte_eth_dev *eth_dev = NULL; > struct virtio_net *vhost_dev = NULL; > struct eth_driver *eth_drv = NULL; > struct rte_pci_id *id_table = NULL; > struct ether_addr *eth_addr = NULL; > > if (name == NULL) > return -EINVAL; > > vhost_dev = get_device_by_name(name); > > if (vhost_dev == NULL) > return -EINVAL; > > RTE_LOG(INFO, PMD, "Creating vhost ethdev on numa socket %u\n", > numa_node); > > data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); > if (data == NULL) > goto error; > > pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, numa_node); > if (pci_dev == NULL) > goto error; > > id_table = rte_zmalloc_socket(name, sizeof(*id_table), 0, numa_node); > if (id_table == NULL) > goto error; > > internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node); > if (internals == NULL) > goto error; > > eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); > if (internals == NULL) > goto error; > > eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); > if (eth_dev == NULL) > goto error; > > eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, numa_node); > if (eth_drv == NULL) > goto error; > > internals->nb_rx_queues = nb_rx_queues; > internals->nb_tx_queues = nb_tx_queues; > internals->numa_node = numa_node; > internals->dev = vhost_dev; > > internals->port_id = eth_dev->data->port_id; > > eth_drv->pci_drv.name = drivername; > eth_drv->pci_drv.id_table = id_table; > internals->eth_drv = eth_drv; > > pci_dev->numa_node = numa_node; > pci_dev->driver = ð_drv->pci_drv; > > data->dev_private = internals; > data->port_id = eth_dev->data->port_id; > data->nb_rx_queues = (uint16_t)nb_rx_queues; > data->nb_tx_queues = (uint16_t)nb_tx_queues; > data->dev_link = pmd_link; > eth_random_addr(ð_addr->addr_bytes[0]); > data->mac_addrs = eth_addr; > strncpy(data->name, eth_dev->data->name, strlen(eth_dev->data->name)); > > eth_dev->data = data; > eth_dev->dev_ops = ð_ops; > eth_dev->pci_dev = pci_dev; > eth_dev->driver = &rte_vhost_pmd; > eth_dev->rx_pkt_burst = eth_vhost_rx; > eth_dev->tx_pkt_burst = eth_vhost_tx; > TAILQ_INIT(&(eth_dev->link_intr_cbs)); > > return 0; > > error: > rte_free(data); > rte_free(pci_dev); > rte_free(id_table); > rte_free(eth_drv); > rte_free(eth_addr); > rte_free(internals); > > return -1; > } > > static int > rte_pmd_vhost_devinit(const char *name, > const char *params __attribute__((unused))) > { > unsigned numa_node; > > if (name == NULL) > return -EINVAL; > > RTE_LOG(DEBUG, PMD, "Initializing pmd_vhost for %s\n", name); > > numa_node = rte_socket_id(); > > return eth_dev_vhost_create(name, numa_node); > } > > static int > rte_pmd_vhost_devuninit(const char *name) > { > struct rte_eth_dev *eth_dev = NULL; > struct pmd_internals *internals = NULL; > > if (name == NULL) > return -EINVAL; > > RTE_LOG(DEBUG, PMD, "Closing vhost ethdev on numa socket %u\n", > rte_socket_id()); > > /* reserve an ethdev entry */ > eth_dev = rte_eth_dev_allocated(name); > if (eth_dev == NULL) > return -1; > > internals = (struct pmd_internals *)eth_dev->data->dev_private; > rte_free(internals->eth_drv->pci_drv.id_table); > rte_free(internals->eth_drv); > rte_free(eth_dev->data->dev_private); > rte_free(eth_dev->data->mac_addrs); > rte_free(eth_dev->data); > rte_free(eth_dev->pci_dev); > > rte_eth_dev_release_port(eth_dev); > > return 0; > } > > static struct rte_driver pmd_vhost_drv = { > .name = "vhost", > .type = PMD_VDEV, > .init = rte_pmd_vhost_devinit, > .uninit = rte_pmd_vhost_devuninit, > }; > > PMD_REGISTER_DRIVER(pmd_vhost_drv);