All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qi Zhang <qi.z.zhang@intel.com>
To: dev@dpdk.org
Cc: magnus.karlsson@intel.com, bjorn.topel@intel.com,
	Qi Zhang <qi.z.zhang@intel.com>
Subject: [RFC v2 6/7] net/af_xdp: load BPF file
Date: Thu,  8 Mar 2018 21:52:48 +0800	[thread overview]
Message-ID: <20180308135249.28187-7-qi.z.zhang@intel.com> (raw)
In-Reply-To: <20180308135249.28187-1-qi.z.zhang@intel.com>

Add libbpf and libelf dependency in Makefile.
Durring initialization, a bpf prog which call imm "xdpsk_redirect"
will be loaded. Then the driver will always try to link XDP fd with
DRV mode first, then SKB mode if failed in previoius. Link will be
released during dev_close.

Note: this is workaround solution, af_xdp may remove BPF dependency
in future.

Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
---
 drivers/net/af_xdp/Makefile         |   5 +-
 drivers/net/af_xdp/bpf_load.c       | 168 ++++++++++++++++++++++++++++++++++++
 drivers/net/af_xdp/bpf_load.h       |  11 +++
 drivers/net/af_xdp/rte_eth_af_xdp.c |  80 ++++++++++++++---
 mk/rte.app.mk                       |   2 +-
 5 files changed, 254 insertions(+), 12 deletions(-)
 create mode 100644 drivers/net/af_xdp/bpf_load.c
 create mode 100644 drivers/net/af_xdp/bpf_load.h

diff --git a/drivers/net/af_xdp/Makefile b/drivers/net/af_xdp/Makefile
index 990073655..f16b5306b 100644
--- a/drivers/net/af_xdp/Makefile
+++ b/drivers/net/af_xdp/Makefile
@@ -12,7 +12,9 @@ EXPORT_MAP := rte_pmd_af_xdp_version.map
 
 LIBABIVER := 1
 
-CFLAGS += -O3 -I/opt/af_xdp/linux_headers/include
+LINUX_HEADER_DIR := /opt/af_xdp/linux_headers/include
+
+CFLAGS += -O3 -I$(LINUX_HEADER_DIR)
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
@@ -22,5 +24,6 @@ LDLIBS += -lrte_bus_vdev
 # all source are stored in SRCS-y
 #
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += rte_eth_af_xdp.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += bpf_load.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/af_xdp/bpf_load.c b/drivers/net/af_xdp/bpf_load.c
new file mode 100644
index 000000000..255e67187
--- /dev/null
+++ b/drivers/net/af_xdp/bpf_load.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/types.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <poll.h>
+#include <ctype.h>
+#include <assert.h>
+#include "bpf_load.h"
+
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct bpf_insn prog[] = {
+	{
+		.code = 0x85, //call imm
+		.dst_reg = 0,
+		.src_reg = 0,
+		.off = 0,
+		.imm = BPF_FUNC_xdpsk_redirect,
+	},
+	{
+		.code = 0x95, //exit
+		.dst_reg = 0,
+		.src_reg = 0,
+		.off = 0,
+		.imm = 0,
+	},
+};
+
+int load_bpf_file(void)
+{
+	int fd;
+
+	fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog,
+			      ARRAY_SIZE(prog),
+			      "GPL", 0,
+			      bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+	if (fd < 0) {
+		printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
+		return -1;
+	}
+
+	return fd;
+}
+
+int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+	struct sockaddr_nl sa;
+	int sock, len, ret = -1;
+	uint32_t seq = 0;
+	char buf[4096];
+	struct nlattr *nla, *nla_xdp;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	struct nlmsghdr *nh;
+	struct nlmsgerr *err;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+
+	/* started nested attribute for XDP */
+	nla = (struct nlattr *)(((char *)&req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+	nla->nla_len = NLA_HDRLEN;
+
+	/* add XDP fd */
+	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+	nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+	nla->nla_len += nla_xdp->nla_len;
+
+	/* if user passed in any flags, add those too */
+	if (flags) {
+		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+		nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
+		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+		nla->nla_len += nla_xdp->nla_len;
+	}
+
+	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		printf("send to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	len = recv(sock, buf, sizeof(buf), 0);
+	if (len < 0) {
+		printf("recv from netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, (unsigned int)len);
+	     nh = NLMSG_NEXT(nh, len)) {
+		if (nh->nlmsg_pid != (uint32_t)getpid()) {
+			printf("Wrong pid %d, expected %d\n",
+			       nh->nlmsg_pid, getpid());
+			goto cleanup;
+		}
+		if (nh->nlmsg_seq != seq) {
+			printf("Wrong seq %d, expected %d\n",
+			       nh->nlmsg_seq, seq);
+			goto cleanup;
+		}
+		switch (nh->nlmsg_type) {
+		case NLMSG_ERROR:
+			err = (struct nlmsgerr *)NLMSG_DATA(nh);
+			if (!err->error)
+				continue;
+			printf("nlmsg error %s\n", strerror(-err->error));
+			goto cleanup;
+		case NLMSG_DONE:
+			break;
+		}
+	}
+
+	ret = 0;
+
+cleanup:
+	close(sock);
+	return ret;
+}
diff --git a/drivers/net/af_xdp/bpf_load.h b/drivers/net/af_xdp/bpf_load.h
new file mode 100644
index 000000000..2561ede55
--- /dev/null
+++ b/drivers/net/af_xdp/bpf_load.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#ifndef __BPF_LOAD_H
+#define __BPF_LOAD_H
+
+#include <bpf/bpf.h>
+
+int load_bpf_file(void);
+int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+#endif
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 7e839f0da..825273c11 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -11,6 +11,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_xdp.h>
+#include <linux/if_link.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <sys/types.h>
@@ -20,6 +21,7 @@
 #include <unistd.h>
 #include <poll.h>
 #include "xdpsock_queue.h"
+#include "bpf_load.h"
 
 #ifndef SOL_XDP
 #define SOL_XDP 283
@@ -81,6 +83,9 @@ struct pmd_internals {
 	uint16_t port_id;
 	uint16_t queue_idx;
 	int ring_size;
+
+	uint32_t xdp_flags;
+	int bpf_fd;
 };
 
 static const char * const valid_arguments[] = {
@@ -97,6 +102,39 @@ static struct rte_eth_link pmd_link = {
 	.link_autoneg = ETH_LINK_AUTONEG
 };
 
+static int load_bpf(struct pmd_internals *internals)
+{
+	/* need fix: hard coded bpf file */
+	int fd = load_bpf_file();
+
+	if (fd < 0)
+		return -1;
+
+	internals->bpf_fd = fd;
+	return 0;
+}
+
+static int link_bpf_file(struct pmd_internals *internals)
+{
+	if (!set_link_xdp_fd(internals->if_index,
+			     internals->bpf_fd,
+			     XDP_FLAGS_DRV_MODE))
+		internals->xdp_flags = XDP_FLAGS_DRV_MODE;
+	else if (!set_link_xdp_fd(internals->if_index,
+				  internals->bpf_fd,
+				  XDP_FLAGS_SKB_MODE))
+		internals->xdp_flags = XDP_FLAGS_SKB_MODE;
+	else
+		return -1;
+
+	return 0;
+}
+
+static void unlink_bpf_file(struct pmd_internals *internals)
+{
+	set_link_xdp_fd(internals->if_index, -1, internals->xdp_flags);
+}
+
 static void *get_pkt_data(struct pmd_internals *internals,
 			  uint32_t index,
 			  uint32_t offset)
@@ -380,8 +418,26 @@ eth_stats_reset(struct rte_eth_dev *dev)
 }
 
 static void
-eth_dev_close(struct rte_eth_dev *dev __rte_unused)
+eth_dev_close(struct rte_eth_dev *dev)
 {
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	if (internals->xdp_flags) {
+		unlink_bpf_file(internals);
+		internals->xdp_flags = 0;
+	}
+
+	if (internals->umem) {
+		if (internals->umem->mb_pool && !internals->share_mb_pool)
+			rte_mempool_free(internals->umem->mb_pool);
+		free(internals->umem);
+		internals->umem = NULL;
+	}
+
+	if (internals->sfd != -1) {
+		close(internals->sfd);
+		internals->sfd = -1;
+	}
 }
 
 static void
@@ -743,9 +799,17 @@ init_internals(struct rte_vdev_device *dev,
 	if (ret)
 		goto error_3;
 
+	if (load_bpf(internals)) {
+		printf("load bpf file failed\n");
+		goto error_3;
+	}
+
+	if (link_bpf_file(internals))
+		goto error_3;
+
 	eth_dev = rte_eth_vdev_allocate(dev, 0);
 	if (!eth_dev)
-		goto error_3;
+		goto error_4;
 
 	rte_memcpy(data, eth_dev->data, sizeof(*data));
 	internals->port_id = eth_dev->data->port_id;
@@ -763,6 +827,9 @@ init_internals(struct rte_vdev_device *dev,
 
 	return 0;
 
+error_4:
+	unlink_bpf_file(internals);
+
 error_3:
 	close(internals->sfd);
 
@@ -808,7 +875,6 @@ static int
 rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 {
 	struct rte_eth_dev *eth_dev = NULL;
-	struct pmd_internals *internals;
 
 	RTE_LOG(INFO, PMD, "Closing AF_XDP ethdev on numa socket %u\n",
 		rte_socket_id());
@@ -821,15 +887,9 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 	if (!eth_dev)
 		return -1;
 
-	internals = eth_dev->data->dev_private;
-	if (internals->umem) {
-		if (internals->umem->mb_pool && !internals->share_mb_pool)
-			rte_mempool_free(internals->umem->mb_pool);
-		rte_free(internals->umem);
-	}
+	eth_dev_close(eth_dev);
 	rte_free(eth_dev->data->dev_private);
 	rte_free(eth_dev->data);
-	close(internals->sfd);
 
 	rte_eth_dev_release_port(eth_dev);
 
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index bc26e1457..d05e6c0e4 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -120,7 +120,7 @@ ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
 _LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_STACK)  += -lrte_mempool_stack
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET)  += -lrte_pmd_af_packet
-_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP)     += -lrte_pmd_af_xdp
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP)     += -lrte_pmd_af_xdp -lelf -lbpf
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ARK_PMD)        += -lrte_pmd_ark
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVF_PMD)        += -lrte_pmd_avf
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVP_PMD)        += -lrte_pmd_avp
-- 
2.13.6

  parent reply	other threads:[~2018-03-08 13:52 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-08 13:52 [RFC v2 0/7] PMD driver for AF_XDP Qi Zhang
2018-03-08 13:52 ` [RFC v2 1/7] net/af_xdp: new PMD driver Qi Zhang
2018-03-08 13:52 ` [RFC v2 2/7] lib/mbuf: enable parse flags when create mempool Qi Zhang
2018-03-08 13:52 ` [RFC v2 3/7] lib/mempool: allow page size aligned mempool Qi Zhang
2018-03-08 13:52 ` [RFC v2 4/7] net/af_xdp: use mbuf mempool for buffer management Qi Zhang
2018-03-08 13:52 ` [RFC v2 5/7] net/af_xdp: enable share mempool Qi Zhang
2018-03-08 13:52 ` Qi Zhang [this message]
2018-03-08 14:20   ` [RFC v2 6/7] net/af_xdp: load BPF file Zhang, Qi Z
2018-03-08 23:15   ` Stephen Hemminger
2018-05-09  7:02     ` Björn Töpel
2018-03-08 13:52 ` [RFC v2 7/7] app/testpmd: enable parameter for mempool flags Qi Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180308135249.28187-7-qi.z.zhang@intel.com \
    --to=qi.z.zhang@intel.com \
    --cc=bjorn.topel@intel.com \
    --cc=dev@dpdk.org \
    --cc=magnus.karlsson@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.