From mboxrd@z Thu Jan 1 00:00:00 1970 From: Shahaf Shuler Subject: Re: [PATCH 05/14] net/mlx5: add multiport IB device support to probing Date: Thu, 21 Mar 2019 12:14:39 +0000 Message-ID: References: <1551376985-11096-1-git-send-email-viacheslavo@mellanox.com> <1553155888-27498-1-git-send-email-viacheslavo@mellanox.com> <1553155888-27498-6-git-send-email-viacheslavo@mellanox.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable To: Slava Ovsiienko , "dev@dpdk.org" Return-path: Received: from EUR01-VE1-obe.outbound.protection.outlook.com (mail-eopbgr140080.outbound.protection.outlook.com [40.107.14.80]) by dpdk.org (Postfix) with ESMTP id 0ED8E1B4B6 for ; Thu, 21 Mar 2019 13:14:41 +0100 (CET) In-Reply-To: <1553155888-27498-6-git-send-email-viacheslavo@mellanox.com> Content-Language: en-US List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko: > Subject: [PATCH 05/14] net/mlx5: add multiport IB device support to probi= ng >=20 > mlx5_pci_probe() routine is refactored to probe the ports of found > Infiniband devices. All active ports (with attached network interface), > belonging to the same Infiniband device will use the signle shared Infini= band > context of that device. >=20 > Signed-off-by: Viacheslav Ovsiienko > --- > drivers/net/mlx5/mlx5.c | 302 +++++++++++++++++++++++++++++++++--- > ------------ > 1 file changed, 210 insertions(+), 92 deletions(-) >=20 > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index > 89c30af..100e9f4 100644 > --- a/drivers/net/mlx5/mlx5.c > +++ b/drivers/net/mlx5/mlx5.c > @@ -130,6 +130,16 @@ > /** Driver-specific log messages type. */ int mlx5_logtype; >=20 > +/** Data associated with devices to spawn. */ struct > +mlx5_dev_spawn_data { > + uint32_t ifindex; /**< Network interface index. */ > + uint32_t max_port; /**< IB device maximal port index. */ > + uint32_t ibv_port; /**< IB device physical port index. */ > + struct mlx5_switch_info info; /**< Switch information. */ > + struct ibv_device *ibv_dev; /**< Associated IB device. */ > + struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ }; > + > /** > * Prepare shared data between primary and secondary process. > */ > @@ -716,12 +726,10 @@ > * > * @param dpdk_dev > * Backing DPDK device. > - * @param ibv_dev > - * Verbs device. > + * @param spawn > + * Verbs device parameters (name, port, switch_info) to spawn. > * @param config > * Device configuration parameters. > - * @param[in] switch_info > - * Switch properties of Ethernet device. > * > * @return > * A valid Ethernet device object on success, NULL otherwise and rte_e= rrno > @@ -732,10 +740,11 @@ > */ > static struct rte_eth_dev * > mlx5_dev_spawn(struct rte_device *dpdk_dev, > - struct ibv_device *ibv_dev, > - struct mlx5_dev_config config, > - const struct mlx5_switch_info *switch_info) > + struct mlx5_dev_spawn_data *spawn, > + struct mlx5_dev_config config) > { > + const struct mlx5_switch_info *switch_info =3D &spawn->info; > + struct ibv_device *ibv_dev =3D spawn->ibv_dev; > struct ibv_context *ctx =3D NULL; > struct ibv_device_attr_ex attr; > struct ibv_port_attr port_attr; > @@ -952,7 +961,7 @@ > return eth_dev; > } > /* Check port status. */ > - err =3D mlx5_glue->query_port(ctx, 1, &port_attr); > + err =3D mlx5_glue->query_port(ctx, spawn->ibv_port, &port_attr); > if (err) { > DRV_LOG(ERR, "port query failed: %s", strerror(err)); > goto error; > @@ -1316,14 +1325,6 @@ > return NULL; > } >=20 > -/** Data associated with devices to spawn. */ -struct > mlx5_dev_spawn_data { > - unsigned int ifindex; /**< Network interface index. */ > - struct mlx5_switch_info info; /**< Switch information. */ > - struct ibv_device *ibv_dev; /**< Associated IB device. */ > - struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ > -}; > - > /** > * Comparison callback to sort device data. > * > @@ -1380,7 +1381,9 @@ struct mlx5_dev_spawn_data { > struct rte_pci_device *pci_dev) { > struct ibv_device **ibv_list; > - unsigned int n =3D 0; > + unsigned int nd =3D 0; > + unsigned int np =3D 0; > + unsigned int ns =3D 0; This fields names are not informative. Find a better ones.=20 > struct mlx5_dev_config dev_config; > int ret; >=20 > @@ -1392,8 +1395,14 @@ struct mlx5_dev_spawn_data { > DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?"); > return -rte_errno; > } > - > + /* > + * First scan the list of all Infiniband devices to find > + * matching ones, gathering into the list. > + */ > struct ibv_device *ibv_match[ret + 1]; > + int nl_route =3D -1; > + int nl_rdma =3D -1; > + unsigned int i; >=20 > while (ret-- > 0) { > struct rte_pci_addr pci_addr; > @@ -1408,77 +1417,183 @@ struct mlx5_dev_spawn_data { > continue; > DRV_LOG(INFO, "PCI information matches for device > \"%s\"", > ibv_list[ret]->name); > - ibv_match[n++] =3D ibv_list[ret]; > + ibv_match[nd++] =3D ibv_list[ret]; > + } > + ibv_match[nd] =3D NULL; > + if (!nd) { > + /* No device macthes, just complain and bail out. */ > + mlx5_glue->free_device_list(ibv_list); > + DRV_LOG(WARNING, > + "no Verbs device matches PCI device " PCI_PRI_FMT > "," > + " are kernel drivers loaded?", > + pci_dev->addr.domain, pci_dev->addr.bus, > + pci_dev->addr.devid, pci_dev->addr.function); > + rte_errno =3D ENOENT; > + ret =3D -rte_errno; > + return ret; > + } > + nl_route =3D mlx5_nl_init(NETLINK_ROUTE); > + nl_rdma =3D mlx5_nl_init(NETLINK_RDMA); > + if (nd =3D=3D 1) { > + /* > + * Found single matching device may have multiple ports. > + * Each port may be representor, we have to check the port > + * number and check the representors existence. > + */ > + if (nl_rdma >=3D 0) > + np =3D mlx5_nl_portnum(nl_rdma, ibv_match[0]- > >name); > + if (!np) > + DRV_LOG(WARNING, "can not get IB device \"%s\"" > + " ports number", ibv_match[0]- > >name); This warning is misleading. On old kernels it is expected to have multiple = IB devices instead of a single one w/ multiple ports. The level should be changed for debug, and the syntax to express it is not = an error.=20 > } > - ibv_match[n] =3D NULL; > - > - struct mlx5_dev_spawn_data list[n]; > - int nl_route =3D n ? mlx5_nl_init(NETLINK_ROUTE) : -1; > - int nl_rdma =3D n ? mlx5_nl_init(NETLINK_RDMA) : -1; > - unsigned int i; > - unsigned int u; > - > /* > - * The existence of several matching entries (n > 1) means port > - * representors have been instantiated. No existing Verbs call nor > - * /sys entries can tell them apart, this can only be done through > - * Netlink calls assuming kernel drivers are recent enough to > - * support them. > - * > - * In the event of identification failure through Netlink, try again > - * through sysfs, then either: > - * > - * 1. No device matches (n =3D=3D 0), complain and bail out. > - * 2. A single IB device matches (n =3D=3D 1) and is not a representor, > - * assume no switch support. > - * 3. Otherwise no safe assumptions can be made; complain louder > and > - * bail out. > + * Now we can determine the maximal > + * amount of devices to be spawned. > */ > - for (i =3D 0; i !=3D n; ++i) { > - list[i].ibv_dev =3D ibv_match[i]; > - list[i].eth_dev =3D NULL; > - if (nl_rdma < 0) > - list[i].ifindex =3D 0; > - else > - list[i].ifindex =3D mlx5_nl_ifindex > - (nl_rdma, list[i].ibv_dev->name, 1); > - if (nl_route < 0 || > - !list[i].ifindex || > - mlx5_nl_switch_info(nl_route, list[i].ifindex, > - &list[i].info) || > - ((!list[i].info.representor && !list[i].info.master) && > - mlx5_sysfs_switch_info(list[i].ifindex, &list[i].info))) { > - list[i].ifindex =3D 0; > - memset(&list[i].info, 0, sizeof(list[i].info)); > - continue; > + struct mlx5_dev_spawn_data list[np ? np : nd]; > + > + if (np > 1) { > + /* > + * Signle IB device with multiple ports found, > + * it may be E-Switch master device and representors. > + * We have to perform identification trough the ports. > + */ > + assert(nl_rdma >=3D 0); > + assert(ns =3D=3D 0); > + assert(nd =3D=3D 1); > + for (i =3D 1; i <=3D np; ++i) { > + list[ns].max_port =3D np; > + list[ns].ibv_port =3D i; > + list[ns].ibv_dev =3D ibv_match[0]; > + list[ns].eth_dev =3D NULL; > + list[ns].ifindex =3D mlx5_nl_ifindex > + (nl_rdma, list[ns].ibv_dev->name, i); > + if (!list[ns].ifindex) { > + /* > + * No network interface index found for the > + * specified port, it means there is no > + * representor on this port. It's OK, > + * there can be disabled ports, for example > + * if sriov_numvfs < sriov_totalvfs. > + */ > + continue; > + } > + ret =3D -1; > + if (nl_route >=3D 0) > + ret =3D mlx5_nl_switch_info > + (nl_route, > + list[ns].ifindex, > + &list[ns].info); > + if (ret || (!list[ns].info.representor && > + !list[ns].info.master)) { > + /* > + * We failed to recognize representors with > + * Netlink, let's try to perform the task > + * with sysfs. > + */ > + ret =3D mlx5_sysfs_switch_info > + (list[ns].ifindex, > + &list[ns].info); > + } > + if (!ret && (list[ns].info.representor ^ > + list[ns].info.master)) > + ns++; > } > - } > - if (nl_rdma >=3D 0) > - close(nl_rdma); > - if (nl_route >=3D 0) > - close(nl_route); > - /* Count unidentified devices. */ > - for (u =3D 0, i =3D 0; i !=3D n; ++i) > - if (!list[i].info.master && !list[i].info.representor) > - ++u; > - if (u) { > - if (n =3D=3D 1 && u =3D=3D 1) { > - /* Case #2. */ > - DRV_LOG(INFO, "no switch support detected"); > - } else { > - /* Case #3. */ > + if (!ns) { > + DRV_LOG(ERR, > + "unable to recognize master/representors" > + " on the IB device with multiple ports"); > + rte_errno =3D ENOENT; > + ret =3D -rte_errno; > + goto exit; > + } > + } else { > + /* > + * The existence of several matching entries (nd > 1) means > + * port representors have been instantiated. No existing > Verbs > + * call nor sysfs entries can tell them apart, this can only > + * be done through Netlink calls assuming kernel drivers are > + * recent enough to support them. > + * > + * In the event of identification failure through Netlink, > + * try again through sysfs, then: > + * > + * 1. A single IB device matches (nd =3D=3D 1) with single > + * port (np=3D0/1) and is not a representor, assume > + * no switch support. > + * > + * 2. Otherwise no safe assumptions can be made; > + * complain louder and bail out. > + */ > + np =3D 1; > + for (i =3D 0; i !=3D nd; ++i) { > + memset(&list[ns].info, 0, sizeof(list[ns].info)); > + list[ns].max_port =3D 1; > + list[ns].ibv_port =3D 1; > + list[ns].ibv_dev =3D ibv_match[i]; > + list[ns].eth_dev =3D NULL; > + list[ns].ifindex =3D 0; > + if (nl_rdma >=3D 0) > + list[ns].ifindex =3D mlx5_nl_ifindex > + (nl_rdma, list[ns].ibv_dev->name, 1); > + if (!list[ns].ifindex) { > + /* > + * No network interface index found for the > + * specified device, it means there it is not > + * a representor/master. > + */ > + continue; > + } > + ret =3D -1; > + if (nl_route >=3D 0) > + ret =3D mlx5_nl_switch_info > + (nl_route, > + list[ns].ifindex, > + &list[ns].info); > + if (ret || (!list[ns].info.representor && > + !list[ns].info.master)) { > + /* > + * We failed to recognize representors with > + * Netlink, let's try to perform the task > + * with sysfs. > + */ > + ret =3D mlx5_sysfs_switch_info > + (list[ns].ifindex, > + &list[ns].info); > + } > + if (!ret && (list[ns].info.representor ^ > + list[ns].info.master)) { > + ns++; > + } else if ((nd =3D=3D 1) && > + !list[ns].info.representor && > + !list[ns].info.master) { > + /* > + * Single IB device with > + * one physical port and > + * attached network device. > + * May be SRIOV is not enabled > + * or there is no representors. > + */ > + DRV_LOG(INFO, "no E-Switch support > detected"); > + ns++; > + break; > + } > + } > + if (!ns) { > DRV_LOG(ERR, > - "unable to tell which of the matching > devices" > - " is the master (lack of kernel support?)"); > - n =3D 0; > + "unable to recognize master/representors" > + " on the multiple IB devices"); > + rte_errno =3D ENOENT; > + ret =3D -rte_errno; > + goto exit; > } > } > + assert(ns); > /* > * Sort list to probe devices in natural order for users convenience > * (i.e. master first, then representors from lowest to highest ID). > */ > - if (n) > - qsort(list, n, sizeof(*list), mlx5_dev_spawn_data_cmp); > + qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp); > /* Default configuration. */ > dev_config =3D (struct mlx5_dev_config){ > .hw_padding =3D 0, > @@ -1497,7 +1612,7 @@ struct mlx5_dev_spawn_data { > .min_rxqs_num =3D MLX5_MPRQ_MIN_RXQS, > }, > }; > - /* Device speicific configuration. */ > + /* Device specific configuration. */ > switch (pci_dev->id.device_id) { > case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF: > dev_config.txqs_vec =3D > MLX5_VPMD_MAX_TXQS_BLUEFIELD; @@ -1514,12 +1629,12 @@ struct > mlx5_dev_spawn_data { > /* Set architecture-dependent default value if unset. */ > if (dev_config.txqs_vec =3D=3D MLX5_ARG_UNSET) > dev_config.txqs_vec =3D MLX5_VPMD_MAX_TXQS; > - for (i =3D 0; i !=3D n; ++i) { > + for (i =3D 0; i !=3D ns; ++i) { > uint32_t restore; >=20 > list[i].eth_dev =3D mlx5_dev_spawn(&pci_dev->device, > - list[i].ibv_dev, dev_config, > - &list[i].info); > + &list[i], > + dev_config); > if (!list[i].eth_dev) { > if (rte_errno !=3D EBUSY && rte_errno !=3D EEXIST) > break; > @@ -1532,16 +1647,7 @@ struct mlx5_dev_spawn_data { > list[i].eth_dev->data->dev_flags |=3D restore; > rte_eth_dev_probing_finish(list[i].eth_dev); > } > - mlx5_glue->free_device_list(ibv_list); > - if (!n) { > - DRV_LOG(WARNING, > - "no Verbs device matches PCI device " PCI_PRI_FMT > "," > - " are kernel drivers loaded?", > - pci_dev->addr.domain, pci_dev->addr.bus, > - pci_dev->addr.devid, pci_dev->addr.function); > - rte_errno =3D ENOENT; > - ret =3D -rte_errno; > - } else if (i !=3D n) { > + if (i !=3D ns) { > DRV_LOG(ERR, > "probe of PCI device " PCI_PRI_FMT " aborted after" > " encountering an error: %s", > @@ -1563,6 +1669,18 @@ struct mlx5_dev_spawn_data { > } else { > ret =3D 0; > } > +exit: > + /* > + * Do the routine cleanup: > + * - close opened Netlink sockets > + * - free the Infiniband device list > + */ > + if (nl_rdma >=3D 0) > + close(nl_rdma); > + if (nl_route >=3D 0) > + close(nl_route); > + assert(ibv_list); > + mlx5_glue->free_device_list(ibv_list); > return ret; > } >=20 > -- > 1.8.3.1