From: Qi Zhang <qi.z.zhang@intel.com>
To: dev@dpdk.org
Cc: magnus.karlsson@intel.com, bjorn.topel@intel.com,
Qi Zhang <qi.z.zhang@intel.com>
Subject: [RFC v2 6/7] net/af_xdp: load BPF file
Date: Thu, 8 Mar 2018 21:52:48 +0800 [thread overview]
Message-ID: <20180308135249.28187-7-qi.z.zhang@intel.com> (raw)
In-Reply-To: <20180308135249.28187-1-qi.z.zhang@intel.com>
Add libbpf and libelf dependency in Makefile.
Durring initialization, a bpf prog which call imm "xdpsk_redirect"
will be loaded. Then the driver will always try to link XDP fd with
DRV mode first, then SKB mode if failed in previoius. Link will be
released during dev_close.
Note: this is workaround solution, af_xdp may remove BPF dependency
in future.
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
---
drivers/net/af_xdp/Makefile | 5 +-
drivers/net/af_xdp/bpf_load.c | 168 ++++++++++++++++++++++++++++++++++++
drivers/net/af_xdp/bpf_load.h | 11 +++
drivers/net/af_xdp/rte_eth_af_xdp.c | 80 ++++++++++++++---
mk/rte.app.mk | 2 +-
5 files changed, 254 insertions(+), 12 deletions(-)
create mode 100644 drivers/net/af_xdp/bpf_load.c
create mode 100644 drivers/net/af_xdp/bpf_load.h
diff --git a/drivers/net/af_xdp/Makefile b/drivers/net/af_xdp/Makefile
index 990073655..f16b5306b 100644
--- a/drivers/net/af_xdp/Makefile
+++ b/drivers/net/af_xdp/Makefile
@@ -12,7 +12,9 @@ EXPORT_MAP := rte_pmd_af_xdp_version.map
LIBABIVER := 1
-CFLAGS += -O3 -I/opt/af_xdp/linux_headers/include
+LINUX_HEADER_DIR := /opt/af_xdp/linux_headers/include
+
+CFLAGS += -O3 -I$(LINUX_HEADER_DIR)
CFLAGS += $(WERROR_FLAGS)
LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
@@ -22,5 +24,6 @@ LDLIBS += -lrte_bus_vdev
# all source are stored in SRCS-y
#
SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += rte_eth_af_xdp.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += bpf_load.c
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/af_xdp/bpf_load.c b/drivers/net/af_xdp/bpf_load.c
new file mode 100644
index 000000000..255e67187
--- /dev/null
+++ b/drivers/net/af_xdp/bpf_load.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/types.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <poll.h>
+#include <ctype.h>
+#include <assert.h>
+#include "bpf_load.h"
+
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct bpf_insn prog[] = {
+ {
+ .code = 0x85, //call imm
+ .dst_reg = 0,
+ .src_reg = 0,
+ .off = 0,
+ .imm = BPF_FUNC_xdpsk_redirect,
+ },
+ {
+ .code = 0x95, //exit
+ .dst_reg = 0,
+ .src_reg = 0,
+ .off = 0,
+ .imm = 0,
+ },
+};
+
+int load_bpf_file(void)
+{
+ int fd;
+
+ fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog,
+ ARRAY_SIZE(prog),
+ "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+ if (fd < 0) {
+ printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
+ return -1;
+ }
+
+ return fd;
+}
+
+int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ struct sockaddr_nl sa;
+ int sock, len, ret = -1;
+ uint32_t seq = 0;
+ char buf[4096];
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ struct nlmsghdr *nh;
+ struct nlmsgerr *err;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ printf("open netlink socket: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ printf("bind to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+
+ /* started nested attribute for XDP */
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+ nla->nla_len = NLA_HDRLEN;
+
+ /* add XDP fd */
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len += nla_xdp->nla_len;
+
+ /* if user passed in any flags, add those too */
+ if (flags) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ printf("send to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ printf("recv from netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, (unsigned int)len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != (uint32_t)getpid()) {
+ printf("Wrong pid %d, expected %d\n",
+ nh->nlmsg_pid, getpid());
+ goto cleanup;
+ }
+ if (nh->nlmsg_seq != seq) {
+ printf("Wrong seq %d, expected %d\n",
+ nh->nlmsg_seq, seq);
+ goto cleanup;
+ }
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ printf("nlmsg error %s\n", strerror(-err->error));
+ goto cleanup;
+ case NLMSG_DONE:
+ break;
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ close(sock);
+ return ret;
+}
diff --git a/drivers/net/af_xdp/bpf_load.h b/drivers/net/af_xdp/bpf_load.h
new file mode 100644
index 000000000..2561ede55
--- /dev/null
+++ b/drivers/net/af_xdp/bpf_load.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#ifndef __BPF_LOAD_H
+#define __BPF_LOAD_H
+
+#include <bpf/bpf.h>
+
+int load_bpf_file(void);
+int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+#endif
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 7e839f0da..825273c11 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -11,6 +11,7 @@
#include <linux/if_ether.h>
#include <linux/if_xdp.h>
+#include <linux/if_link.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <sys/types.h>
@@ -20,6 +21,7 @@
#include <unistd.h>
#include <poll.h>
#include "xdpsock_queue.h"
+#include "bpf_load.h"
#ifndef SOL_XDP
#define SOL_XDP 283
@@ -81,6 +83,9 @@ struct pmd_internals {
uint16_t port_id;
uint16_t queue_idx;
int ring_size;
+
+ uint32_t xdp_flags;
+ int bpf_fd;
};
static const char * const valid_arguments[] = {
@@ -97,6 +102,39 @@ static struct rte_eth_link pmd_link = {
.link_autoneg = ETH_LINK_AUTONEG
};
+static int load_bpf(struct pmd_internals *internals)
+{
+ /* need fix: hard coded bpf file */
+ int fd = load_bpf_file();
+
+ if (fd < 0)
+ return -1;
+
+ internals->bpf_fd = fd;
+ return 0;
+}
+
+static int link_bpf_file(struct pmd_internals *internals)
+{
+ if (!set_link_xdp_fd(internals->if_index,
+ internals->bpf_fd,
+ XDP_FLAGS_DRV_MODE))
+ internals->xdp_flags = XDP_FLAGS_DRV_MODE;
+ else if (!set_link_xdp_fd(internals->if_index,
+ internals->bpf_fd,
+ XDP_FLAGS_SKB_MODE))
+ internals->xdp_flags = XDP_FLAGS_SKB_MODE;
+ else
+ return -1;
+
+ return 0;
+}
+
+static void unlink_bpf_file(struct pmd_internals *internals)
+{
+ set_link_xdp_fd(internals->if_index, -1, internals->xdp_flags);
+}
+
static void *get_pkt_data(struct pmd_internals *internals,
uint32_t index,
uint32_t offset)
@@ -380,8 +418,26 @@ eth_stats_reset(struct rte_eth_dev *dev)
}
static void
-eth_dev_close(struct rte_eth_dev *dev __rte_unused)
+eth_dev_close(struct rte_eth_dev *dev)
{
+ struct pmd_internals *internals = dev->data->dev_private;
+
+ if (internals->xdp_flags) {
+ unlink_bpf_file(internals);
+ internals->xdp_flags = 0;
+ }
+
+ if (internals->umem) {
+ if (internals->umem->mb_pool && !internals->share_mb_pool)
+ rte_mempool_free(internals->umem->mb_pool);
+ free(internals->umem);
+ internals->umem = NULL;
+ }
+
+ if (internals->sfd != -1) {
+ close(internals->sfd);
+ internals->sfd = -1;
+ }
}
static void
@@ -743,9 +799,17 @@ init_internals(struct rte_vdev_device *dev,
if (ret)
goto error_3;
+ if (load_bpf(internals)) {
+ printf("load bpf file failed\n");
+ goto error_3;
+ }
+
+ if (link_bpf_file(internals))
+ goto error_3;
+
eth_dev = rte_eth_vdev_allocate(dev, 0);
if (!eth_dev)
- goto error_3;
+ goto error_4;
rte_memcpy(data, eth_dev->data, sizeof(*data));
internals->port_id = eth_dev->data->port_id;
@@ -763,6 +827,9 @@ init_internals(struct rte_vdev_device *dev,
return 0;
+error_4:
+ unlink_bpf_file(internals);
+
error_3:
close(internals->sfd);
@@ -808,7 +875,6 @@ static int
rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
{
struct rte_eth_dev *eth_dev = NULL;
- struct pmd_internals *internals;
RTE_LOG(INFO, PMD, "Closing AF_XDP ethdev on numa socket %u\n",
rte_socket_id());
@@ -821,15 +887,9 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
if (!eth_dev)
return -1;
- internals = eth_dev->data->dev_private;
- if (internals->umem) {
- if (internals->umem->mb_pool && !internals->share_mb_pool)
- rte_mempool_free(internals->umem->mb_pool);
- rte_free(internals->umem);
- }
+ eth_dev_close(eth_dev);
rte_free(eth_dev->data->dev_private);
rte_free(eth_dev->data);
- close(internals->sfd);
rte_eth_dev_release_port(eth_dev);
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index bc26e1457..d05e6c0e4 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -120,7 +120,7 @@ ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
_LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_STACK) += -lrte_mempool_stack
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += -lrte_pmd_af_packet
-_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += -lrte_pmd_af_xdp
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += -lrte_pmd_af_xdp -lelf -lbpf
_LDLIBS-$(CONFIG_RTE_LIBRTE_ARK_PMD) += -lrte_pmd_ark
_LDLIBS-$(CONFIG_RTE_LIBRTE_AVF_PMD) += -lrte_pmd_avf
_LDLIBS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += -lrte_pmd_avp
--
2.13.6
next prev parent reply other threads:[~2018-03-08 13:52 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-08 13:52 [RFC v2 0/7] PMD driver for AF_XDP Qi Zhang
2018-03-08 13:52 ` [RFC v2 1/7] net/af_xdp: new PMD driver Qi Zhang
2018-03-08 13:52 ` [RFC v2 2/7] lib/mbuf: enable parse flags when create mempool Qi Zhang
2018-03-08 13:52 ` [RFC v2 3/7] lib/mempool: allow page size aligned mempool Qi Zhang
2018-03-08 13:52 ` [RFC v2 4/7] net/af_xdp: use mbuf mempool for buffer management Qi Zhang
2018-03-08 13:52 ` [RFC v2 5/7] net/af_xdp: enable share mempool Qi Zhang
2018-03-08 13:52 ` Qi Zhang [this message]
2018-03-08 14:20 ` [RFC v2 6/7] net/af_xdp: load BPF file Zhang, Qi Z
2018-03-08 23:15 ` Stephen Hemminger
2018-05-09 7:02 ` Björn Töpel
2018-03-08 13:52 ` [RFC v2 7/7] app/testpmd: enable parameter for mempool flags Qi Zhang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180308135249.28187-7-qi.z.zhang@intel.com \
--to=qi.z.zhang@intel.com \
--cc=bjorn.topel@intel.com \
--cc=dev@dpdk.org \
--cc=magnus.karlsson@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.