From mboxrd@z Thu Jan 1 00:00:00 1970 From: Stephen Hemminger Subject: Re: [PATCH 3/3] vhost: Add VHOST PMD Date: Mon, 9 Nov 2015 14:25:05 -0800 Message-ID: <20151109142505.2bdf56e2@xeon-e3> References: <1445507150-1481-2-git-send-email-mukawa@igel.co.jp> <1445926375-18986-1-git-send-email-mukawa@igel.co.jp> <1445926375-18986-4-git-send-email-mukawa@igel.co.jp> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Cc: dev@dpdk.org, ann.zhuangyanying@huawei.com To: Tetsuya Mukawa Return-path: Received: from mail-pa0-f51.google.com (mail-pa0-f51.google.com [209.85.220.51]) by dpdk.org (Postfix) with ESMTP id F1FC6FE5 for ; Mon, 9 Nov 2015 23:24:56 +0100 (CET) Received: by pabfh17 with SMTP id fh17so212354626pab.0 for ; Mon, 09 Nov 2015 14:24:56 -0800 (PST) In-Reply-To: <1445926375-18986-4-git-send-email-mukawa@igel.co.jp> List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" On Tue, 27 Oct 2015 15:12:55 +0900 Tetsuya Mukawa wrote: > The patch introduces a new PMD. This PMD is implemented as thin wrapper > of librte_vhost. It means librte_vhost is also needed to compile the PMD. > The vhost messages will be handled only when a port is started. So start > a port first, then invoke QEMU. > > The PMD has 2 parameters. > - iface: The parameter is used to specify a path to connect to a > virtio-net device. > - queues: The parameter is used to specify the number of the queues > virtio-net device has. > (Default: 1) > > Here is an example. > $ ./testpmd -c f -n 4 --vdev 'eth_vhost0,iface=/tmp/sock0,queues=1' -- -i > > To connect above testpmd, here is qemu command example. > > $ qemu-system-x86_64 \ > > -chardev socket,id=chr0,path=/tmp/sock0 \ > -netdev vhost-user,id=net0,chardev=chr0,vhostforce,queues=1 \ > -device virtio-net-pci,netdev=net0 > > Signed-off-by: Tetsuya Mukawa Brocade developed a much simpler vhost PMD, without all the atomics and locking. /*- * BSD LICENSE * * Copyright (C) Brocade Communications Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Brocade Communications Systems, Inc. * nor the names of its contributors may be used to endorse * or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include "../librte_vhost/rte_virtio_net.h" #include "../librte_vhost/virtio-net.h" struct pmd_internals; struct vhost_queue { struct pmd_internals *internals; struct rte_mempool *mb_pool; uint64_t pkts; uint64_t bytes; }; struct pmd_internals { struct virtio_net *dev; unsigned numa_node; struct eth_driver *eth_drv; unsigned nb_rx_queues; unsigned nb_tx_queues; struct vhost_queue rx_queues[1]; struct vhost_queue tx_queues[1]; uint8_t port_id; }; static const char *drivername = "Vhost PMD"; static struct rte_eth_link pmd_link = { .link_speed = 10000, .link_duplex = ETH_LINK_FULL_DUPLEX, .link_status = 0 }; static uint16_t eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) { int ret, i; struct vhost_queue *h = q; ret = rte_vhost_dequeue_burst(h->internals->dev, VIRTIO_TXQ, h->mb_pool, bufs, nb_bufs); for (i = 0; i < ret ; i++) { struct rte_mbuf *m = bufs[i]; m->port = h->internals->port_id; ++h->pkts; h->bytes += rte_pktmbuf_pkt_len(m); } return ret; } static uint16_t eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) { int ret, i; struct vhost_queue *h = q; ret = rte_vhost_enqueue_burst(h->internals->dev, VIRTIO_RXQ, bufs, nb_bufs); for (i = 0; i < ret; i++) { struct rte_mbuf *m = bufs[i]; ++h->pkts; h->bytes += rte_pktmbuf_pkt_len(m); rte_pktmbuf_free(m); } return ret; } static int eth_dev_configure(struct rte_eth_dev *dev __rte_unused) { return 0; } static int eth_dev_start(struct rte_eth_dev *dev) { struct pmd_internals *internals = dev->data->dev_private; dev->data->dev_link.link_status = 1; RTE_LOG(INFO, PMD, "vhost(%s): link up\n", internals->dev->ifname); return 0; } static void eth_dev_stop(struct rte_eth_dev *dev) { struct pmd_internals *internals = dev->data->dev_private; dev->data->dev_link.link_status = 0; RTE_LOG(INFO, PMD, "vhost(%s): link down\n", internals->dev->ifname); } static int eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, uint16_t nb_rx_desc __rte_unused, unsigned int socket_id __rte_unused, const struct rte_eth_rxconf *rx_conf __rte_unused, struct rte_mempool *mb_pool) { struct pmd_internals *internals = dev->data->dev_private; internals->rx_queues[rx_queue_id].mb_pool = mb_pool; dev->data->rx_queues[rx_queue_id] = &internals->rx_queues[rx_queue_id]; internals->rx_queues[rx_queue_id].internals = internals; return 0; } static int eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, uint16_t nb_tx_desc __rte_unused, unsigned int socket_id __rte_unused, const struct rte_eth_txconf *tx_conf __rte_unused) { struct pmd_internals *internals = dev->data->dev_private; dev->data->tx_queues[tx_queue_id] = &internals->tx_queues[tx_queue_id]; internals->tx_queues[tx_queue_id].internals = internals; return 0; } static void eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; dev_info->driver_name = drivername; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = -1; dev_info->max_rx_queues = (uint16_t)internals->nb_rx_queues; dev_info->max_tx_queues = (uint16_t)internals->nb_tx_queues; dev_info->min_rx_bufsize = 0; dev_info->pci_dev = NULL; } static void eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { const struct pmd_internals *internal = dev->data->dev_private; unsigned i; for (i = 0; i < internal->nb_rx_queues; i++) { const struct vhost_queue *h = &internal->rx_queues[i]; stats->ipackets += h->pkts; stats->ibytes += h->bytes; if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { stats->q_ibytes[i] = h->bytes; stats->q_ipackets[i] = h->pkts; } } for (i = 0; i < internal->nb_tx_queues; i++) { const struct vhost_queue *h = &internal->tx_queues[i]; stats->opackets += h->pkts; stats->obytes += h->bytes; if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { stats->q_obytes[i] = h->bytes; stats->q_opackets[i] = h->pkts; } } } static void eth_stats_reset(struct rte_eth_dev *dev) { unsigned i; struct pmd_internals *internal = dev->data->dev_private; for (i = 0; i < internal->nb_rx_queues; i++) { internal->rx_queues[i].pkts = 0; internal->rx_queues[i].bytes = 0; } for (i = 0; i < internal->nb_tx_queues; i++) { internal->tx_queues[i].pkts = 0; internal->tx_queues[i].bytes = 0; } } static struct eth_driver rte_vhost_pmd = { .pci_drv = { .name = "rte_vhost_pmd", .drv_flags = RTE_PCI_DRV_DETACHABLE, }, }; static void eth_queue_release(void *q __rte_unused) { } static int eth_link_update(struct rte_eth_dev *dev __rte_unused, int wait_to_complete __rte_unused) { return 0; } static struct eth_dev_ops eth_ops = { .dev_start = eth_dev_start, .dev_stop = eth_dev_stop, .dev_configure = eth_dev_configure, .dev_infos_get = eth_dev_info, .rx_queue_setup = eth_rx_queue_setup, .tx_queue_setup = eth_tx_queue_setup, .rx_queue_release = eth_queue_release, .tx_queue_release = eth_queue_release, .link_update = eth_link_update, .stats_get = eth_stats_get, .stats_reset = eth_stats_reset, }; static int eth_dev_vhost_create(const char *name, const unsigned numa_node) { const unsigned nb_rx_queues = 1; const unsigned nb_tx_queues = 1; struct rte_eth_dev_data *data = NULL; struct rte_pci_device *pci_dev = NULL; struct pmd_internals *internals = NULL; struct rte_eth_dev *eth_dev = NULL; struct virtio_net *vhost_dev = NULL; struct eth_driver *eth_drv = NULL; struct rte_pci_id *id_table = NULL; struct ether_addr *eth_addr = NULL; if (name == NULL) return -EINVAL; vhost_dev = get_device_by_name(name); if (vhost_dev == NULL) return -EINVAL; RTE_LOG(INFO, PMD, "Creating vhost ethdev on numa socket %u\n", numa_node); data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); if (data == NULL) goto error; pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, numa_node); if (pci_dev == NULL) goto error; id_table = rte_zmalloc_socket(name, sizeof(*id_table), 0, numa_node); if (id_table == NULL) goto error; internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node); if (internals == NULL) goto error; eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node); if (internals == NULL) goto error; eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); if (eth_dev == NULL) goto error; eth_drv = rte_zmalloc_socket(name, sizeof(*eth_drv), 0, numa_node); if (eth_drv == NULL) goto error; internals->nb_rx_queues = nb_rx_queues; internals->nb_tx_queues = nb_tx_queues; internals->numa_node = numa_node; internals->dev = vhost_dev; internals->port_id = eth_dev->data->port_id; eth_drv->pci_drv.name = drivername; eth_drv->pci_drv.id_table = id_table; internals->eth_drv = eth_drv; pci_dev->numa_node = numa_node; pci_dev->driver = ð_drv->pci_drv; data->dev_private = internals; data->port_id = eth_dev->data->port_id; data->nb_rx_queues = (uint16_t)nb_rx_queues; data->nb_tx_queues = (uint16_t)nb_tx_queues; data->dev_link = pmd_link; eth_random_addr(ð_addr->addr_bytes[0]); data->mac_addrs = eth_addr; strncpy(data->name, eth_dev->data->name, strlen(eth_dev->data->name)); eth_dev->data = data; eth_dev->dev_ops = ð_ops; eth_dev->pci_dev = pci_dev; eth_dev->driver = &rte_vhost_pmd; eth_dev->rx_pkt_burst = eth_vhost_rx; eth_dev->tx_pkt_burst = eth_vhost_tx; TAILQ_INIT(&(eth_dev->link_intr_cbs)); return 0; error: rte_free(data); rte_free(pci_dev); rte_free(id_table); rte_free(eth_drv); rte_free(eth_addr); rte_free(internals); return -1; } static int rte_pmd_vhost_devinit(const char *name, const char *params __attribute__((unused))) { unsigned numa_node; if (name == NULL) return -EINVAL; RTE_LOG(DEBUG, PMD, "Initializing pmd_vhost for %s\n", name); numa_node = rte_socket_id(); return eth_dev_vhost_create(name, numa_node); } static int rte_pmd_vhost_devuninit(const char *name) { struct rte_eth_dev *eth_dev = NULL; struct pmd_internals *internals = NULL; if (name == NULL) return -EINVAL; RTE_LOG(DEBUG, PMD, "Closing vhost ethdev on numa socket %u\n", rte_socket_id()); /* reserve an ethdev entry */ eth_dev = rte_eth_dev_allocated(name); if (eth_dev == NULL) return -1; internals = (struct pmd_internals *)eth_dev->data->dev_private; rte_free(internals->eth_drv->pci_drv.id_table); rte_free(internals->eth_drv); rte_free(eth_dev->data->dev_private); rte_free(eth_dev->data->mac_addrs); rte_free(eth_dev->data); rte_free(eth_dev->pci_dev); rte_eth_dev_release_port(eth_dev); return 0; } static struct rte_driver pmd_vhost_drv = { .name = "vhost", .type = PMD_VDEV, .init = rte_pmd_vhost_devinit, .uninit = rte_pmd_vhost_devuninit, }; PMD_REGISTER_DRIVER(pmd_vhost_drv);