From mboxrd@z Thu Jan 1 00:00:00 1970 From: Slava Ovsiienko Subject: Re: [PATCH 05/14] net/mlx5: add multiport IB device support to probing Date: Thu, 21 Mar 2019 12:54:19 +0000 Message-ID: References: <1551376985-11096-1-git-send-email-viacheslavo@mellanox.com> <1553155888-27498-1-git-send-email-viacheslavo@mellanox.com> <1553155888-27498-6-git-send-email-viacheslavo@mellanox.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable To: Shahaf Shuler , "dev@dpdk.org" Return-path: Received: from EUR02-HE1-obe.outbound.protection.outlook.com (mail-eopbgr10062.outbound.protection.outlook.com [40.107.1.62]) by dpdk.org (Postfix) with ESMTP id 96C9F1B4B9 for ; Thu, 21 Mar 2019 13:54:22 +0100 (CET) In-Reply-To: Content-Language: en-US List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" > -----Original Message----- > From: Shahaf Shuler > Sent: Thursday, March 21, 2019 14:15 > To: Slava Ovsiienko ; dev@dpdk.org > Subject: RE: [PATCH 05/14] net/mlx5: add multiport IB device support to > probing >=20 > Thursday, March 21, 2019 10:11 AM, Viacheslav Ovsiienko: > > Subject: [PATCH 05/14] net/mlx5: add multiport IB device support to > > probing > > > > mlx5_pci_probe() routine is refactored to probe the ports of found > > Infiniband devices. All active ports (with attached network > > interface), belonging to the same Infiniband device will use the > > signle shared Infiniband context of that device. > > > > Signed-off-by: Viacheslav Ovsiienko > > --- > > drivers/net/mlx5/mlx5.c | 302 +++++++++++++++++++++++++++++++++--- > > ------------ > > 1 file changed, 210 insertions(+), 92 deletions(-) > > > > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index > > 89c30af..100e9f4 100644 > > --- a/drivers/net/mlx5/mlx5.c > > +++ b/drivers/net/mlx5/mlx5.c > > @@ -130,6 +130,16 @@ > > /** Driver-specific log messages type. */ int mlx5_logtype; > > > > +/** Data associated with devices to spawn. */ struct > > +mlx5_dev_spawn_data { > > + uint32_t ifindex; /**< Network interface index. */ > > + uint32_t max_port; /**< IB device maximal port index. */ > > + uint32_t ibv_port; /**< IB device physical port index. */ > > + struct mlx5_switch_info info; /**< Switch information. */ > > + struct ibv_device *ibv_dev; /**< Associated IB device. */ > > + struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ }; > > + > > /** > > * Prepare shared data between primary and secondary process. > > */ > > @@ -716,12 +726,10 @@ > > * > > * @param dpdk_dev > > * Backing DPDK device. > > - * @param ibv_dev > > - * Verbs device. > > + * @param spawn > > + * Verbs device parameters (name, port, switch_info) to spawn. > > * @param config > > * Device configuration parameters. > > - * @param[in] switch_info > > - * Switch properties of Ethernet device. > > * > > * @return > > * A valid Ethernet device object on success, NULL otherwise and > rte_errno > > @@ -732,10 +740,11 @@ > > */ > > static struct rte_eth_dev * > > mlx5_dev_spawn(struct rte_device *dpdk_dev, > > - struct ibv_device *ibv_dev, > > - struct mlx5_dev_config config, > > - const struct mlx5_switch_info *switch_info) > > + struct mlx5_dev_spawn_data *spawn, > > + struct mlx5_dev_config config) > > { > > + const struct mlx5_switch_info *switch_info =3D &spawn->info; > > + struct ibv_device *ibv_dev =3D spawn->ibv_dev; > > struct ibv_context *ctx =3D NULL; > > struct ibv_device_attr_ex attr; > > struct ibv_port_attr port_attr; > > @@ -952,7 +961,7 @@ > > return eth_dev; > > } > > /* Check port status. */ > > - err =3D mlx5_glue->query_port(ctx, 1, &port_attr); > > + err =3D mlx5_glue->query_port(ctx, spawn->ibv_port, &port_attr); > > if (err) { > > DRV_LOG(ERR, "port query failed: %s", strerror(err)); > > goto error; > > @@ -1316,14 +1325,6 @@ > > return NULL; > > } > > > > -/** Data associated with devices to spawn. */ -struct > > mlx5_dev_spawn_data { > > - unsigned int ifindex; /**< Network interface index. */ > > - struct mlx5_switch_info info; /**< Switch information. */ > > - struct ibv_device *ibv_dev; /**< Associated IB device. */ > > - struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ > > -}; > > - > > /** > > * Comparison callback to sort device data. > > * > > @@ -1380,7 +1381,9 @@ struct mlx5_dev_spawn_data { > > struct rte_pci_device *pci_dev) { > > struct ibv_device **ibv_list; > > - unsigned int n =3D 0; > > + unsigned int nd =3D 0; > > + unsigned int np =3D 0; > > + unsigned int ns =3D 0; >=20 > This fields names are not informative. Find a better ones. Would the adding clarifying comments be enough ? nd - Number of (PCI) Devices (nd !=3D 1 means we have multiple devices wi= th the same BDF - old schema) np - Number of (device) Ports (nd =3D1, np 1...n means we have new multipor= t device) ns - Number to Spawn (deduced index - number of iterations) This names are used as indices, long names may make code less readable, IMH= O. >=20 > > struct mlx5_dev_config dev_config; > > int ret; > > > > @@ -1392,8 +1395,14 @@ struct mlx5_dev_spawn_data { > > DRV_LOG(ERR, "cannot list devices, is ib_uverbs loaded?"); > > return -rte_errno; > > } > > - > > + /* > > + * First scan the list of all Infiniband devices to find > > + * matching ones, gathering into the list. > > + */ > > struct ibv_device *ibv_match[ret + 1]; > > + int nl_route =3D -1; > > + int nl_rdma =3D -1; > > + unsigned int i; > > > > while (ret-- > 0) { > > struct rte_pci_addr pci_addr; > > @@ -1408,77 +1417,183 @@ struct mlx5_dev_spawn_data { > > continue; > > DRV_LOG(INFO, "PCI information matches for device \"%s\"", > > ibv_list[ret]->name); > > - ibv_match[n++] =3D ibv_list[ret]; > > + ibv_match[nd++] =3D ibv_list[ret]; > > + } > > + ibv_match[nd] =3D NULL; > > + if (!nd) { > > + /* No device macthes, just complain and bail out. */ > > + mlx5_glue->free_device_list(ibv_list); > > + DRV_LOG(WARNING, > > + "no Verbs device matches PCI device " PCI_PRI_FMT > > "," > > + " are kernel drivers loaded?", > > + pci_dev->addr.domain, pci_dev->addr.bus, > > + pci_dev->addr.devid, pci_dev->addr.function); > > + rte_errno =3D ENOENT; > > + ret =3D -rte_errno; > > + return ret; > > + } > > + nl_route =3D mlx5_nl_init(NETLINK_ROUTE); > > + nl_rdma =3D mlx5_nl_init(NETLINK_RDMA); > > + if (nd =3D=3D 1) { > > + /* > > + * Found single matching device may have multiple ports. > > + * Each port may be representor, we have to check the port > > + * number and check the representors existence. > > + */ > > + if (nl_rdma >=3D 0) > > + np =3D mlx5_nl_portnum(nl_rdma, ibv_match[0]- > > >name); > > + if (!np) > > + DRV_LOG(WARNING, "can not get IB device \"%s\"" > > + " ports number", ibv_match[0]- > > >name); >=20 > This warning is misleading. On old kernels it is expected to have multipl= e IB > devices instead of a single one w/ multiple ports. > The level should be changed for debug, and the syntax to express it is no= t an > error. >=20 > > } > > - ibv_match[n] =3D NULL; > > - > > - struct mlx5_dev_spawn_data list[n]; > > - int nl_route =3D n ? mlx5_nl_init(NETLINK_ROUTE) : -1; > > - int nl_rdma =3D n ? mlx5_nl_init(NETLINK_RDMA) : -1; > > - unsigned int i; > > - unsigned int u; > > - > > /* > > - * The existence of several matching entries (n > 1) means port > > - * representors have been instantiated. No existing Verbs call nor > > - * /sys entries can tell them apart, this can only be done through > > - * Netlink calls assuming kernel drivers are recent enough to > > - * support them. > > - * > > - * In the event of identification failure through Netlink, try again > > - * through sysfs, then either: > > - * > > - * 1. No device matches (n =3D=3D 0), complain and bail out. > > - * 2. A single IB device matches (n =3D=3D 1) and is not a represento= r, > > - * assume no switch support. > > - * 3. Otherwise no safe assumptions can be made; complain louder > > and > > - * bail out. > > + * Now we can determine the maximal > > + * amount of devices to be spawned. > > */ > > - for (i =3D 0; i !=3D n; ++i) { > > - list[i].ibv_dev =3D ibv_match[i]; > > - list[i].eth_dev =3D NULL; > > - if (nl_rdma < 0) > > - list[i].ifindex =3D 0; > > - else > > - list[i].ifindex =3D mlx5_nl_ifindex > > - (nl_rdma, list[i].ibv_dev->name, 1); > > - if (nl_route < 0 || > > - !list[i].ifindex || > > - mlx5_nl_switch_info(nl_route, list[i].ifindex, > > - &list[i].info) || > > - ((!list[i].info.representor && !list[i].info.master) && > > - mlx5_sysfs_switch_info(list[i].ifindex, &list[i].info))) { > > - list[i].ifindex =3D 0; > > - memset(&list[i].info, 0, sizeof(list[i].info)); > > - continue; > > + struct mlx5_dev_spawn_data list[np ? np : nd]; > > + > > + if (np > 1) { > > + /* > > + * Signle IB device with multiple ports found, > > + * it may be E-Switch master device and representors. > > + * We have to perform identification trough the ports. > > + */ > > + assert(nl_rdma >=3D 0); > > + assert(ns =3D=3D 0); > > + assert(nd =3D=3D 1); > > + for (i =3D 1; i <=3D np; ++i) { > > + list[ns].max_port =3D np; > > + list[ns].ibv_port =3D i; > > + list[ns].ibv_dev =3D ibv_match[0]; > > + list[ns].eth_dev =3D NULL; > > + list[ns].ifindex =3D mlx5_nl_ifindex > > + (nl_rdma, list[ns].ibv_dev->name, i); > > + if (!list[ns].ifindex) { > > + /* > > + * No network interface index found for the > > + * specified port, it means there is no > > + * representor on this port. It's OK, > > + * there can be disabled ports, for example > > + * if sriov_numvfs < sriov_totalvfs. > > + */ > > + continue; > > + } > > + ret =3D -1; > > + if (nl_route >=3D 0) > > + ret =3D mlx5_nl_switch_info > > + (nl_route, > > + list[ns].ifindex, > > + &list[ns].info); > > + if (ret || (!list[ns].info.representor && > > + !list[ns].info.master)) { > > + /* > > + * We failed to recognize representors with > > + * Netlink, let's try to perform the task > > + * with sysfs. > > + */ > > + ret =3D mlx5_sysfs_switch_info > > + (list[ns].ifindex, > > + &list[ns].info); > > + } > > + if (!ret && (list[ns].info.representor ^ > > + list[ns].info.master)) > > + ns++; > > } > > - } > > - if (nl_rdma >=3D 0) > > - close(nl_rdma); > > - if (nl_route >=3D 0) > > - close(nl_route); > > - /* Count unidentified devices. */ > > - for (u =3D 0, i =3D 0; i !=3D n; ++i) > > - if (!list[i].info.master && !list[i].info.representor) > > - ++u; > > - if (u) { > > - if (n =3D=3D 1 && u =3D=3D 1) { > > - /* Case #2. */ > > - DRV_LOG(INFO, "no switch support detected"); > > - } else { > > - /* Case #3. */ > > + if (!ns) { > > + DRV_LOG(ERR, > > + "unable to recognize master/representors" > > + " on the IB device with multiple ports"); > > + rte_errno =3D ENOENT; > > + ret =3D -rte_errno; > > + goto exit; > > + } > > + } else { > > + /* > > + * The existence of several matching entries (nd > 1) means > > + * port representors have been instantiated. No existing > > Verbs > > + * call nor sysfs entries can tell them apart, this can only > > + * be done through Netlink calls assuming kernel drivers are > > + * recent enough to support them. > > + * > > + * In the event of identification failure through Netlink, > > + * try again through sysfs, then: > > + * > > + * 1. A single IB device matches (nd =3D=3D 1) with single > > + * port (np=3D0/1) and is not a representor, assume > > + * no switch support. > > + * > > + * 2. Otherwise no safe assumptions can be made; > > + * complain louder and bail out. > > + */ > > + np =3D 1; > > + for (i =3D 0; i !=3D nd; ++i) { > > + memset(&list[ns].info, 0, sizeof(list[ns].info)); > > + list[ns].max_port =3D 1; > > + list[ns].ibv_port =3D 1; > > + list[ns].ibv_dev =3D ibv_match[i]; > > + list[ns].eth_dev =3D NULL; > > + list[ns].ifindex =3D 0; > > + if (nl_rdma >=3D 0) > > + list[ns].ifindex =3D mlx5_nl_ifindex > > + (nl_rdma, list[ns].ibv_dev->name, 1); > > + if (!list[ns].ifindex) { > > + /* > > + * No network interface index found for the > > + * specified device, it means there it is not > > + * a representor/master. > > + */ > > + continue; > > + } > > + ret =3D -1; > > + if (nl_route >=3D 0) > > + ret =3D mlx5_nl_switch_info > > + (nl_route, > > + list[ns].ifindex, > > + &list[ns].info); > > + if (ret || (!list[ns].info.representor && > > + !list[ns].info.master)) { > > + /* > > + * We failed to recognize representors with > > + * Netlink, let's try to perform the task > > + * with sysfs. > > + */ > > + ret =3D mlx5_sysfs_switch_info > > + (list[ns].ifindex, > > + &list[ns].info); > > + } > > + if (!ret && (list[ns].info.representor ^ > > + list[ns].info.master)) { > > + ns++; > > + } else if ((nd =3D=3D 1) && > > + !list[ns].info.representor && > > + !list[ns].info.master) { > > + /* > > + * Single IB device with > > + * one physical port and > > + * attached network device. > > + * May be SRIOV is not enabled > > + * or there is no representors. > > + */ > > + DRV_LOG(INFO, "no E-Switch support > > detected"); > > + ns++; > > + break; > > + } > > + } > > + if (!ns) { > > DRV_LOG(ERR, > > - "unable to tell which of the matching > > devices" > > - " is the master (lack of kernel support?)"); > > - n =3D 0; > > + "unable to recognize master/representors" > > + " on the multiple IB devices"); > > + rte_errno =3D ENOENT; > > + ret =3D -rte_errno; > > + goto exit; > > } > > } > > + assert(ns); > > /* > > * Sort list to probe devices in natural order for users convenience > > * (i.e. master first, then representors from lowest to highest ID). > > */ > > - if (n) > > - qsort(list, n, sizeof(*list), mlx5_dev_spawn_data_cmp); > > + qsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp); > > /* Default configuration. */ > > dev_config =3D (struct mlx5_dev_config){ > > .hw_padding =3D 0, > > @@ -1497,7 +1612,7 @@ struct mlx5_dev_spawn_data { > > .min_rxqs_num =3D MLX5_MPRQ_MIN_RXQS, > > }, > > }; > > - /* Device speicific configuration. */ > > + /* Device specific configuration. */ > > switch (pci_dev->id.device_id) { > > case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF: > > dev_config.txqs_vec =3D > > MLX5_VPMD_MAX_TXQS_BLUEFIELD; @@ -1514,12 +1629,12 @@ struct > > mlx5_dev_spawn_data { > > /* Set architecture-dependent default value if unset. */ > > if (dev_config.txqs_vec =3D=3D MLX5_ARG_UNSET) > > dev_config.txqs_vec =3D MLX5_VPMD_MAX_TXQS; > > - for (i =3D 0; i !=3D n; ++i) { > > + for (i =3D 0; i !=3D ns; ++i) { > > uint32_t restore; > > > > list[i].eth_dev =3D mlx5_dev_spawn(&pci_dev->device, > > - list[i].ibv_dev, dev_config, > > - &list[i].info); > > + &list[i], > > + dev_config); > > if (!list[i].eth_dev) { > > if (rte_errno !=3D EBUSY && rte_errno !=3D EEXIST) > > break; > > @@ -1532,16 +1647,7 @@ struct mlx5_dev_spawn_data { > > list[i].eth_dev->data->dev_flags |=3D restore; > > rte_eth_dev_probing_finish(list[i].eth_dev); > > } > > - mlx5_glue->free_device_list(ibv_list); > > - if (!n) { > > - DRV_LOG(WARNING, > > - "no Verbs device matches PCI device " PCI_PRI_FMT > > "," > > - " are kernel drivers loaded?", > > - pci_dev->addr.domain, pci_dev->addr.bus, > > - pci_dev->addr.devid, pci_dev->addr.function); > > - rte_errno =3D ENOENT; > > - ret =3D -rte_errno; > > - } else if (i !=3D n) { > > + if (i !=3D ns) { > > DRV_LOG(ERR, > > "probe of PCI device " PCI_PRI_FMT " aborted after" > > " encountering an error: %s", > > @@ -1563,6 +1669,18 @@ struct mlx5_dev_spawn_data { > > } else { > > ret =3D 0; > > } > > +exit: > > + /* > > + * Do the routine cleanup: > > + * - close opened Netlink sockets > > + * - free the Infiniband device list > > + */ > > + if (nl_rdma >=3D 0) > > + close(nl_rdma); > > + if (nl_route >=3D 0) > > + close(nl_route); > > + assert(ibv_list); > > + mlx5_glue->free_device_list(ibv_list); > > return ret; > > } > > > > -- > > 1.8.3.1