From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jeff Guo Subject: [PATCH v2 1/2] eal: add uevent api for hot plug Date: Wed, 28 Jun 2017 19:07:23 +0800 Message-ID: <1498648044-57541-1-git-send-email-jia.guo@intel.com> References: <1495986280-26207-1-git-send-email-jia.guo@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: dev@dpdk.org, jia.guo@intel.com To: helin.zhang@intel.com, jingjing.wu@intel.com Return-path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 5D9492C8 for ; Wed, 28 Jun 2017 13:11:25 +0200 (CEST) In-Reply-To: <1495986280-26207-1-git-send-email-jia.guo@intel.com> List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: "Guo, Jia" This patch aim to add a variable "uevent_fd" in structure "rte_intr_handle" for enable kernel object uevent monitoring, and add some uevent API in rte eal interrupt, that is “rte_uevent_connect” and “rte_uevent_get”, so that all driver could use these API to monitor and read out the uevent, then corresponding to handle these uevent, such as detach or attach the device. Signed-off-by: Guo, Jia --- v2->v1: remove global variables of hotplug_fd, add uevent_fd in rte_intr_handle to let each pci device self maintain to fix dual device fd issue. refine some typo error. --- lib/librte_eal/common/eal_common_pci_uio.c | 6 +- lib/librte_eal/linuxapp/eal/eal_interrupts.c | 143 ++++++++++++++++++++- lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 12 ++ .../linuxapp/eal/include/exec-env/rte_interrupts.h | 34 +++++ 4 files changed, 192 insertions(+), 3 deletions(-) diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c index 367a681..5b62f70 100644 --- a/lib/librte_eal/common/eal_common_pci_uio.c +++ b/lib/librte_eal/common/eal_common_pci_uio.c @@ -117,6 +117,7 @@ dev->intr_handle.fd = -1; dev->intr_handle.uio_cfg_fd = -1; + dev->intr_handle.uevent_fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; /* secondary processes - use already recorded details */ @@ -227,7 +228,10 @@ close(dev->intr_handle.uio_cfg_fd); dev->intr_handle.uio_cfg_fd = -1; } - + if (dev->intr_handle.uevent_fd >= 0) { + close(dev->intr_handle.uevent_fd); + dev->intr_handle.uevent_fd = -1; + } dev->intr_handle.fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; } diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 2e3bd12..d596522 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -65,6 +65,10 @@ #include #include +#include +#include +#include + #include "eal_private.h" #include "eal_vfio.h" #include "eal_thread.h" @@ -74,6 +78,9 @@ static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ +#define RTE_UEVENT_MSG_LEN 4096 +#define RTE_UEVENT_SUBSYSTEM_UIO 1 + /** * union for pipe fds. */ @@ -669,10 +676,13 @@ struct rte_intr_source { RTE_SET_USED(r); return -1; } + rte_spinlock_lock(&intr_lock); TAILQ_FOREACH(src, &intr_sources, next) - if (src->intr_handle.fd == - events[n].data.fd) + if ((src->intr_handle.fd == + events[n].data.fd) || + (src->intr_handle.uevent_fd == + events[n].data.fd)) break; if (src == NULL){ rte_spinlock_unlock(&intr_lock); @@ -858,7 +868,24 @@ static __attribute__((noreturn)) void * } else numfds++; + + /** + * add device uevent file descriptor + * into wait list for uevent monitoring. + */ + ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; + ev.data.fd = src->intr_handle.uevent_fd; + if (epoll_ctl(pfd, EPOLL_CTL_ADD, + src->intr_handle.uevent_fd, &ev) < 0){ + rte_panic("Error adding uevent_fd %d epoll_ctl" + ", %s\n", + src->intr_handle.uevent_fd, + strerror(errno)); + } else + numfds++; } + + rte_spinlock_unlock(&intr_lock); /* serve the interrupt */ eal_intr_handle_interrupts(pfd, numfds); @@ -1255,3 +1282,115 @@ static __attribute__((noreturn)) void * return 0; } + +int +rte_uevent_connect(void) +{ + struct sockaddr_nl addr; + int ret; + int netlink_fd = -1; + int size = 64 * 1024; + int nonblock = 1; + memset(&addr, 0, sizeof(addr)); + addr.nl_family = AF_NETLINK; + addr.nl_pid = 0; + addr.nl_groups = 0xffffffff; + + netlink_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT); + if (netlink_fd < 0) + return -1; + + RTE_LOG(ERR, EAL, + "netlink_fd is %d\n", netlink_fd); + + setsockopt(netlink_fd, SOL_SOCKET, SO_RCVBUFFORCE, &size, sizeof(size)); + + ret = ioctl(netlink_fd, FIONBIO, &nonblock); + if (ret != 0) { + RTE_LOG(ERR, EAL, + "ioctl(FIONBIO) failed\n"); + close(netlink_fd); + return -1; + } + + if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { + close(netlink_fd); + return -1; + } + + return netlink_fd; +} + +static int +parse_event(const char *buf, struct rte_uevent *event) +{ + char action[RTE_UEVENT_MSG_LEN]; + char subsystem[RTE_UEVENT_MSG_LEN]; + char dev_path[RTE_UEVENT_MSG_LEN]; + int i = 0; + + memset(action, 0, RTE_UEVENT_MSG_LEN); + memset(subsystem, 0, RTE_UEVENT_MSG_LEN); + memset(dev_path, 0, RTE_UEVENT_MSG_LEN); + + while (*buf && i < RTE_UEVENT_MSG_LEN) { + if (!strncmp(buf, "ACTION=", 7)) { + buf += 7; + snprintf(action, sizeof(action), "%s", buf); + } else if (!strncmp(buf, "DEVPATH=", 8)) { + buf += 8; + snprintf(dev_path, sizeof(dev_path), "%s", buf); + } else if (!strncmp(buf, "SUBSYSTEM=", 10)) { + buf += 10; + snprintf(subsystem, sizeof(subsystem), "%s", buf); + } + while (*buf++) + i++; + while (*buf == '\0') { + buf++; + i++; + } + } + + if (!strncmp(subsystem, "uio", 3)) { + + event->subsystem = RTE_UEVENT_SUBSYSTEM_UIO; + if (!strncmp(action, "add", 3)) + event->action = RTE_UEVENT_ADD; + if (!strncmp(action, "remove", 6)) + event->action = RTE_UEVENT_REMOVE; + return 0; + } + + return -1; +} + +int +rte_uevent_get(int fd, struct rte_uevent *uevent) +{ + int ret; + char buf[RTE_UEVENT_MSG_LEN]; + + memset(uevent, 0, sizeof(struct rte_uevent)); + memset(buf, 0, RTE_UEVENT_MSG_LEN); + + ret = recv(fd, buf, RTE_UEVENT_MSG_LEN - 1, MSG_DONTWAIT); + if (ret > 0) + return parse_event(buf, uevent); + + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } else { + RTE_LOG(ERR, EAL, + "Socket read error(%d): %s\n", + errno, strerror(errno)); + } + } + + /* connection closed */ + if (ret == 0) + return -1; + + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index fa10329..2fead82 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -231,6 +231,10 @@ close(dev->intr_handle.uio_cfg_fd); dev->intr_handle.uio_cfg_fd = -1; } + if (dev->intr_handle.uevent_fd >= 0) { + close(dev->intr_handle.uevent_fd); + dev->intr_handle.uevent_fd = -1; + } if (dev->intr_handle.fd >= 0) { close(dev->intr_handle.fd); dev->intr_handle.fd = -1; @@ -245,6 +249,7 @@ char dirname[PATH_MAX]; char cfgname[PATH_MAX]; char devname[PATH_MAX]; /* contains the /dev/uioX */ + char uevtname[PATH_MAX]; int uio_num; struct rte_pci_addr *loc; @@ -276,6 +281,13 @@ goto error; } + dev->intr_handle.uevent_fd = rte_uevent_connect(); + if (dev->intr_handle.uevent_fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + uevtname, strerror(errno)); + goto error; + } + if (dev->kdrv == RTE_KDRV_IGB_UIO) dev->intr_handle.type = RTE_INTR_HANDLE_UIO; else { diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h index 6daffeb..bd1780d 100644 --- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -90,6 +90,7 @@ struct rte_intr_handle { for uio_pci_generic */ }; int fd; /**< interrupt event file descriptor */ + int uevent_fd; /**< uevent file descriptor */ enum rte_intr_handle_type type; /**< handle type */ uint32_t max_intr; /**< max interrupt requested */ uint32_t nb_efd; /**< number of available efd(event fd) */ @@ -99,6 +100,16 @@ struct rte_intr_handle { int *intr_vec; /**< intr vector number array */ }; +enum rte_uevent_action { + RTE_UEVENT_ADD = 0, /**< uevent type of device add */ + RTE_UEVENT_REMOVE = 1, /**< uevent type of device remove*/ +}; + +struct rte_uevent { + enum rte_uevent_action action; /**< uevent action type */ + int subsystem; /**< subsystem id */ +}; + #define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */ /** @@ -236,4 +247,27 @@ struct rte_intr_handle { int rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); +/** + * It read out the uevent from the specific file descriptor. + * + * @param fd + * The fd which the uevent associated to + * @param uevent + * Pointer to the uevent which read from the monitoring fd. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_uevent_get(int fd, struct rte_uevent *uevent); + +/** + * Connect to the device uevent file descriptor. + * @return + * - On success, the connected uevent fd. + * - On failure, a negative value. + */ +int +rte_uevent_connect(void); + #endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */ -- 1.8.3.1