From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jianfeng Tan Subject: [PATCH v2 12/12] net/vhost: support to run in the secondary process Date: Thu, 28 Sep 2017 13:55:59 +0000 Message-ID: <1506606959-76230-13-git-send-email-jianfeng.tan@intel.com> References: <1503654052-84730-1-git-send-email-jianfeng.tan@intel.com> <1506606959-76230-1-git-send-email-jianfeng.tan@intel.com> Cc: bruce.richardson@intel.com, konstantin.ananyev@intel.com, pablo.de.lara.guarch@intel.com, thomas@monjalon.net, yliu@fridaylinux.org, maxime.coquelin@redhat.com, mtetsuyah@gmail.com, ferruh.yigit@intel.com, Jianfeng Tan To: dev@dpdk.org Return-path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id 0DA541B204 for ; Thu, 28 Sep 2017 15:55:05 +0200 (CEST) In-Reply-To: <1506606959-76230-1-git-send-email-jianfeng.tan@intel.com> List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support to run vhost-pmd vdev in the secondary process. We obtain information, like memory regions, kickfd, callfd, through primary/secondary communication channel. And by invoking rte_vhost_set_vring_effective_fd, we can set the kickfd which can be recognized by the secondary process. Signed-off-by: Jianfeng Tan --- drivers/net/vhost/rte_eth_vhost.c | 200 +++++++++++++++++++++++++++++++++++--- 1 file changed, 187 insertions(+), 13 deletions(-) diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c index 0dac5e6..6a685a3 100644 --- a/drivers/net/vhost/rte_eth_vhost.c +++ b/drivers/net/vhost/rte_eth_vhost.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,20 @@ #include "rte_eth_vhost.h" +#define VHOST_MSG_TYPE_REGIONS 1 +#define VHOST_MSG_TYPE_SET_FDS 2 +#define VHOST_MSG_TYPE_INIT 3 + +struct vhost_params { + int type; + union { + int vid; + int portid; + }; + int vring_idx; + struct rte_vhost_mem_region regions[0]; +}; + enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; #define ETH_VHOST_IFACE_ARG "iface" @@ -550,6 +565,66 @@ update_queuing_status(struct rte_eth_dev *dev) } static int +share_device(int vid) +{ + uint32_t i, vring_num; + int len; + int fds[8]; + struct rte_vhost_memory *mem; + struct vhost_params *params; + struct rte_vhost_vring vring; + + /* share mem table */ + if (rte_vhost_get_mem_table(vid, &mem) < 0) { + RTE_LOG(ERR, PMD, "Failed to get mem table\n"); + return 0; + } + for (i = 0; i < mem->nregions; ++i) + fds[i] = mem->regions[i].fd; + + len = sizeof(struct rte_vhost_mem_region) * mem->nregions; + params = malloc(sizeof(*params) + len); + if (params == NULL) { + RTE_LOG(ERR, PMD, "Failed to allocate memory\n"); + return -1; + } + + params->type = VHOST_MSG_TYPE_REGIONS; + params->vid = vid; + memcpy(params->regions, mem->regions, len); + + if (rte_eal_mp_sendmsg("vhost pmd", params, sizeof(*params) + len, + fds, mem->nregions) < 0) { + RTE_LOG(ERR, PMD, "Failed to share mem table\n"); + free(params); + return -1; + } + + /* share callfd and kickfd */ + params->type = VHOST_MSG_TYPE_SET_FDS; + vring_num = rte_vhost_get_vring_num(vid); + for (i = 0; i < vring_num; i++) { + if (rte_vhost_get_vhost_vring(vid, i, &vring) < 0) { + RTE_LOG(ERR, PMD, "Failed to get vring, idx = %d\n", i); + free(params); + return -1; + } + + params->vring_idx = i; + fds[0] = vring.callfd; + fds[1] = vring.kickfd; + if (rte_eal_mp_sendmsg("vhost pmd", params, + sizeof(*params), fds, 2) < 0) { + RTE_LOG(ERR, PMD, "Failed to set fds\n"); + return -1; + } + } + + free(params); + return 0; +} + +static int new_device(int vid) { struct rte_eth_dev *eth_dev; @@ -610,6 +685,8 @@ new_device(int vid) _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL, NULL); + share_device(vid); + return 0; } @@ -1025,13 +1102,6 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n", numa_node); - /* now do all data allocation - for eth_dev structure and internal - * (private) data - */ - data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); - if (data == NULL) - goto error; - list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); if (list == NULL) goto error; @@ -1073,11 +1143,7 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, rte_spinlock_init(&vring_state->lock); vring_states[eth_dev->data->port_id] = vring_state; - /* We'll replace the 'data' originally allocated by eth_dev. So the - * vhost PMD resources won't be shared between multi processes. - */ - rte_memcpy(data, eth_dev->data, sizeof(*data)); - eth_dev->data = data; + data = eth_dev->data; data->nb_rx_queues = queues; data->nb_tx_queues = queues; @@ -1125,6 +1191,30 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, return -1; } +static int +eth_dev_vhost_attach(struct rte_vdev_device *dev) +{ + struct rte_eth_dev *eth_dev = NULL; + struct rte_eth_dev_data *data = NULL; + + RTE_LOG(INFO, PMD, "Attach vhost user port\n"); + + /* reserve an ethdev entry */ + eth_dev = rte_eth_vdev_allocate(dev, sizeof(struct pmd_internal)); + if (eth_dev == NULL) + return -1; + + eth_dev->dev_ops = &ops; + + /* finally assign rx and tx ops */ + eth_dev->rx_pkt_burst = eth_vhost_rx; + eth_dev->tx_pkt_burst = eth_vhost_tx; + + data = eth_dev->data; + + return data->port_id; +} + static inline int open_iface(const char *key __rte_unused, const char *value, void *extra_args) { @@ -1154,10 +1244,84 @@ open_int(const char *key __rte_unused, const char *value, void *extra_args) } static int +vhost_pmd_action(const void *params, int len, int fds[], int fds_num) +{ + int i; + int vid; + void *base_addr; + const struct vhost_params *p = params; + const struct rte_vhost_mem_region *regions; + + if (len < (int)sizeof(*p)) { + RTE_LOG(ERR, PMD, "message if too short\n"); + return -1; + } + + switch (p->type) { + case VHOST_MSG_TYPE_REGIONS: + regions = p->regions; + for (i = 0; i < fds_num; ++i) { + base_addr = mmap(regions[i].mmap_addr, + regions[i].mmap_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fds[i], 0); + if (base_addr != regions[i].mmap_addr) { + RTE_LOG(ERR, PMD, + "vhost in secondary mmap error: %s\n", + strerror(errno)); + break; + } + } + break; + case VHOST_MSG_TYPE_SET_FDS: + rte_vhost_set_vring_effective_fd(p->vid, + p->vring_idx, + fds[0], fds[1]); + break; + case VHOST_MSG_TYPE_INIT: + vid = rte_eth_vhost_get_vid_from_port_id(p->portid); + share_device(vid); + break; + } + + return 0; +} + +static int +probe_secondary(struct rte_vdev_device *dev) +{ + int portid = eth_dev_vhost_attach(dev); + struct rte_eth_dev *eth_dev; + struct pmd_internal *internal; + struct vhost_params p; + + if (portid < 0) + return -1; + + eth_dev = &rte_eth_devices[portid]; + internal = eth_dev->data->dev_private; + + if (!internal || + rte_atomic32_read(&internal->dev_attached) == 0) { + RTE_LOG(INFO, PMD, "%s is not ready\n", dev->device.name); + return 0; + } + + p.type = VHOST_MSG_TYPE_INIT; + p.portid = portid; + if (rte_eal_mp_sendmsg("vhost pmd", &p, sizeof(p), NULL, 0) < 0) { + RTE_LOG(ERR, PMD, "Failed to send request for init\n"); + return -1; + } + + return 0; +} + +static int rte_pmd_vhost_probe(struct rte_vdev_device *dev) { struct rte_kvargs *kvlist = NULL; - int ret = 0; + int ret; char *iface_name; uint16_t queues; uint64_t flags = 0; @@ -1167,6 +1331,15 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", rte_vdev_device_name(dev)); + ret = rte_eal_mp_action_register("vhost pmd", vhost_pmd_action); + if (ret < 0 && ret != -EEXIST) { + RTE_LOG(ERR, PMD, "vhost fails to add action\n"); + return -1; + } + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return probe_secondary(dev); + kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); if (kvlist == NULL) return -1; @@ -1216,6 +1389,7 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) eth_dev_vhost_create(dev, iface_name, queues, dev->device.numa_node, flags); + ret = 0; out_free: rte_kvargs_free(kvlist); return ret; -- 2.7.4