All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexei Starovoitov <ast@kernel.org>
To: <davem@davemloft.net>
Cc: <daniel@iogearbox.net>, <torvalds@linux-foundation.org>,
	<gregkh@linuxfoundation.org>, <luto@amacapital.net>,
	<netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<kernel-team@fb.com>
Subject: [PATCH RFC v2 net-next 4/4] bpfilter: rough bpfilter codegen example hack
Date: Wed, 2 May 2018 21:36:04 -0700	[thread overview]
Message-ID: <20180503043604.1604587-5-ast@kernel.org> (raw)
In-Reply-To: <20180503043604.1604587-1-ast@kernel.org>

From: Daniel Borkmann <daniel@iogearbox.net>

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/bpfilter/Makefile       |   2 +-
 net/bpfilter/bpfilter_mod.h | 285 ++++++++++++++++++++++++++++++++++++++++++-
 net/bpfilter/ctor.c         |  57 +++++----
 net/bpfilter/gen.c          | 290 ++++++++++++++++++++++++++++++++++++++++++++
 net/bpfilter/init.c         |  11 +-
 net/bpfilter/main.c         |  15 ++-
 net/bpfilter/sockopt.c      | 137 ++++++++++++++++-----
 net/bpfilter/tables.c       |   5 +-
 net/bpfilter/tgts.c         |   1 +
 9 files changed, 737 insertions(+), 66 deletions(-)
 create mode 100644 net/bpfilter/gen.c

diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index bec6181de995..3796651c76cb 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -4,7 +4,7 @@
 #
 
 hostprogs-y := bpfilter_umh
-bpfilter_umh-objs := main.o tgts.o targets.o tables.o init.o ctor.o sockopt.o
+bpfilter_umh-objs := main.o tgts.o targets.o tables.o init.o ctor.o sockopt.o gen.o
 HOSTCFLAGS += -I. -Itools/include/
 
 # a bit of elf magic to convert bpfilter_umh binary into a binary blob
diff --git a/net/bpfilter/bpfilter_mod.h b/net/bpfilter/bpfilter_mod.h
index f0de41b20793..b4209985efff 100644
--- a/net/bpfilter/bpfilter_mod.h
+++ b/net/bpfilter/bpfilter_mod.h
@@ -21,8 +21,8 @@ struct bpfilter_table_info {
 	unsigned int		initial_entries;
 	unsigned int		hook_entry[BPFILTER_INET_HOOK_MAX];
 	unsigned int		underflow[BPFILTER_INET_HOOK_MAX];
-	unsigned int		stacksize;
-	void			***jumpstack;
+//	unsigned int		stacksize;
+//	void			***jumpstack;
 	unsigned char		entries[0] __aligned(8);
 };
 
@@ -64,22 +64,55 @@ struct bpfilter_ipt_error {
 
 struct bpfilter_target {
 	struct list_head	all_target_list;
-	const char		name[BPFILTER_EXTENSION_MAXNAMELEN];
+	char			name[BPFILTER_EXTENSION_MAXNAMELEN];
 	unsigned int		size;
 	int			hold;
 	u16			family;
 	u8			rev;
 };
 
+struct bpfilter_gen_ctx {
+	struct bpf_insn		*img;
+	u32			len_cur;
+	u32			len_max;
+	u32			default_verdict;
+	int			fd;
+	int			ifindex;
+	bool			offloaded;
+};
+
+union bpf_attr;
+int sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+
+int bpfilter_gen_init(struct bpfilter_gen_ctx *ctx);
+int bpfilter_gen_prologue(struct bpfilter_gen_ctx *ctx);
+int bpfilter_gen_epilogue(struct bpfilter_gen_ctx *ctx);
+int bpfilter_gen_append(struct bpfilter_gen_ctx *ctx,
+			struct bpfilter_ipt_ip *ent, int verdict);
+int bpfilter_gen_commit(struct bpfilter_gen_ctx *ctx);
+void bpfilter_gen_destroy(struct bpfilter_gen_ctx *ctx);
+
 struct bpfilter_target *bpfilter_target_get_by_name(const char *name);
 void bpfilter_target_put(struct bpfilter_target *tgt);
 int bpfilter_target_add(struct bpfilter_target *tgt);
 
-struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl);
+struct bpfilter_table_info *
+bpfilter_ipv4_table_alloc(struct bpfilter_table *tbl, __u32 size_ents);
+struct bpfilter_table_info *
+bpfilter_ipv4_table_finalize(struct bpfilter_table *tbl,
+			     struct bpfilter_table_info *info,
+			     __u32 size_ents, __u32 num_ents);
+struct bpfilter_table_info *
+bpfilter_ipv4_table_finalize2(struct bpfilter_table *tbl,
+			      struct bpfilter_table_info *info,
+			      __u32 size_ents, __u32 num_ents);
+
 int bpfilter_ipv4_register_targets(void);
 void bpfilter_tables_init(void);
 int bpfilter_get_info(void *addr, int len);
 int bpfilter_get_entries(void *cmd, int len);
+int bpfilter_set_replace(void *cmd, int len);
+int bpfilter_set_add_counters(void *cmd, int len);
 int bpfilter_ipv4_init(void);
 
 int copy_from_user(void *dst, void *addr, int len);
@@ -93,4 +126,248 @@ extern int pid;
 extern int debug_fd;
 #define ENOTSUPP        524
 
+/* Helper macros for filter block array initializers. */
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
+
+#define BPF_ENDIAN(TYPE, DST, LEN)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = LEN })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_MOV32_REG(DST, SRC)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM)					\
+	BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_LD | BPF_DW | BPF_IMM,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = (__u32) (IMM) }),			\
+	((struct bpf_insn) {					\
+		.code  = 0, /* zero is reserved opcode */	\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = ((__u64) (IMM)) >> 32 })
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD)				\
+	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
+
+#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */
+
+#define BPF_LD_IND(SIZE, SRC, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,	\
+		.dst_reg = 0,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF)						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Function call */
+
+#define BPF_EMIT_CALL(FUNC)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_CALL,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = ((FUNC) - __bpf_call_base) })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
+	((struct bpf_insn) {					\
+		.code  = CODE,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN()						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_EXIT,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
 #endif
diff --git a/net/bpfilter/ctor.c b/net/bpfilter/ctor.c
index efb7feef3c42..ba44c21cacfa 100644
--- a/net/bpfilter/ctor.c
+++ b/net/bpfilter/ctor.c
@@ -1,8 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <sys/socket.h>
-#include <linux/bitops.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <string.h>
+
+#include <sys/socket.h>
+
+#include <linux/bitops.h>
+
 #include "bpfilter_mod.h"
 
 unsigned int __sw_hweight32(unsigned int w)
@@ -13,35 +17,47 @@ unsigned int __sw_hweight32(unsigned int w)
 	return (w * 0x01010101) >> 24;
 }
 
-struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl)
+struct bpfilter_table_info *bpfilter_ipv4_table_alloc(struct bpfilter_table *tbl,
+						      __u32 size_ents)
 {
 	unsigned int num_hooks = hweight32(tbl->valid_hooks);
-	struct bpfilter_ipt_standard *tgts;
 	struct bpfilter_table_info *info;
-	struct bpfilter_ipt_error *term;
-	unsigned int mask, offset, h, i;
 	unsigned int size, alloc_size;
 
 	size  = sizeof(struct bpfilter_ipt_standard) * num_hooks;
 	size += sizeof(struct bpfilter_ipt_error);
+	size += size_ents;
 
 	alloc_size = size + sizeof(struct bpfilter_table_info);
 
 	info = malloc(alloc_size);
-	if (!info)
-		return NULL;
+	if (info) {
+		memset(info, 0, alloc_size);
+		info->size = size;
+	}
+	return info;
+}
+
+struct bpfilter_table_info *bpfilter_ipv4_table_finalize(struct bpfilter_table *tbl,
+							 struct bpfilter_table_info *info,
+							 __u32 size_ents, __u32 num_ents)
+{
+	unsigned int num_hooks = hweight32(tbl->valid_hooks);
+	struct bpfilter_ipt_standard *tgts;
+	struct bpfilter_ipt_error *term;
+	struct bpfilter_ipt_entry *ent;
+	unsigned int mask, offset, h, i;
 
-	info->num_entries = num_hooks + 1;
-	info->size = size;
+	info->num_entries = num_ents + num_hooks + 1;
 
-	tgts = (struct bpfilter_ipt_standard *) (info + 1);
-	term = (struct bpfilter_ipt_error *) (tgts + num_hooks);
+	ent  = (struct bpfilter_ipt_entry *)(info + 1);
+	tgts = (struct bpfilter_ipt_standard *)((u8 *)ent + size_ents);
+	term = (struct bpfilter_ipt_error *)(tgts + num_hooks);
 
 	mask = tbl->valid_hooks;
 	offset = 0;
 	h = 0;
 	i = 0;
-	dprintf(debug_fd, "mask %x num_hooks %d\n", mask, num_hooks);
 	while (mask) {
 		struct bpfilter_ipt_standard *t;
 
@@ -55,7 +71,6 @@ struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl)
 			BPFILTER_IPT_STANDARD_INIT(BPFILTER_NF_ACCEPT);
 		t->target.target.u.kernel.target =
 			bpfilter_target_get_by_name(t->target.target.u.user.name);
-		dprintf(debug_fd, "user.name %s\n", t->target.target.u.user.name);
 		if (!t->target.target.u.kernel.target)
 			goto out_fail;
 
@@ -67,14 +82,10 @@ struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl)
 	*term = (struct bpfilter_ipt_error) BPFILTER_IPT_ERROR_INIT;
 	term->target.target.u.kernel.target =
 		bpfilter_target_get_by_name(term->target.target.u.user.name);
-	dprintf(debug_fd, "user.name %s\n", term->target.target.u.user.name);
-	if (!term->target.target.u.kernel.target)
-		goto out_fail;
-
-	dprintf(debug_fd, "info %p\n", info);
-	return info;
-
+	if (!term->target.target.u.kernel.target) {
 out_fail:
-	free(info);
-	return NULL;
+		free(info);
+		return NULL;
+	}
+	return info;
 }
diff --git a/net/bpfilter/gen.c b/net/bpfilter/gen.c
new file mode 100644
index 000000000000..8e08561b78f1
--- /dev/null
+++ b/net/bpfilter/gen.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <linux/rtnetlink.h>
+#include <linux/bpf.h>
+typedef __u16 __bitwise __sum16; /* hack */
+#include <linux/ip.h>
+
+#include <arpa/inet.h>
+
+#include "bpfilter_mod.h"
+
+unsigned int if_nametoindex(const char *ifname);
+
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static int bpf_prog_load(enum bpf_prog_type type,
+			 const struct bpf_insn *insns,
+			 unsigned int insn_num,
+			 __u32 offload_ifindex)
+{
+	union bpf_attr attr = {};
+
+	attr.prog_type		= type;
+	attr.insns		= bpf_ptr_to_u64(insns);
+	attr.insn_cnt		= insn_num;
+	attr.license		= bpf_ptr_to_u64("GPL");
+	attr.prog_ifindex	= offload_ifindex;
+
+	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+	struct sockaddr_nl sa;
+	int sock, seq = 0, len, ret = -1;
+	char buf[4096];
+	struct nlattr *nla, *nla_xdp;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	struct nlmsghdr *nh;
+	struct nlmsgerr *err;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+
+	/* started nested attribute for XDP */
+	nla = (struct nlattr *)(((char *)&req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+	nla->nla_len = NLA_HDRLEN;
+
+	/* add XDP fd */
+	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+	nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+	nla->nla_len += nla_xdp->nla_len;
+
+	/* if user passed in any flags, add those too */
+	if (flags) {
+		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+		nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
+		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+		nla->nla_len += nla_xdp->nla_len;
+	}
+
+	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		printf("send to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	len = recv(sock, buf, sizeof(buf), 0);
+	if (len < 0) {
+		printf("recv from netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+	     nh = NLMSG_NEXT(nh, len)) {
+		if (nh->nlmsg_pid != getpid()) {
+			printf("Wrong pid %d, expected %d\n",
+			       nh->nlmsg_pid, getpid());
+			goto cleanup;
+		}
+		if (nh->nlmsg_seq != seq) {
+			printf("Wrong seq %d, expected %d\n",
+			       nh->nlmsg_seq, seq);
+			goto cleanup;
+		}
+		switch (nh->nlmsg_type) {
+		case NLMSG_ERROR:
+			err = (struct nlmsgerr *)NLMSG_DATA(nh);
+			if (!err->error)
+				continue;
+			printf("nlmsg error %s\n", strerror(-err->error));
+			goto cleanup;
+		case NLMSG_DONE:
+			break;
+		}
+	}
+
+	ret = 0;
+
+cleanup:
+	close(sock);
+	return ret;
+}
+
+static int bpfilter_load_dev(struct bpfilter_gen_ctx *ctx)
+{
+	u32 xdp_flags = 0;
+
+	if (ctx->offloaded)
+		xdp_flags |= XDP_FLAGS_HW_MODE;
+	return bpf_set_link_xdp_fd(ctx->ifindex, ctx->fd, xdp_flags);
+}
+
+int bpfilter_gen_init(struct bpfilter_gen_ctx *ctx)
+{
+	unsigned int len_max = BPF_MAXINSNS;
+
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->img = calloc(len_max, sizeof(struct bpf_insn));
+	if (!ctx->img)
+		return -ENOMEM;
+	ctx->len_max = len_max;
+	ctx->fd = -1;
+	ctx->default_verdict = XDP_PASS;
+
+	return 0;
+}
+
+#define EMIT(x)						\
+	do {						\
+		if (ctx->len_cur + 1 > ctx->len_max)	\
+			return -ENOMEM;			\
+		ctx->img[ctx->len_cur++] = x;		\
+	} while (0)
+
+int bpfilter_gen_prologue(struct bpfilter_gen_ctx *ctx)
+{
+	EMIT(BPF_MOV64_REG(BPF_REG_9, BPF_REG_1));
+	EMIT(BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_9,
+			 offsetof(struct xdp_md, data)));
+	EMIT(BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_9,
+			 offsetof(struct xdp_md, data_end)));
+	EMIT(BPF_MOV64_REG(BPF_REG_1, BPF_REG_2));
+	EMIT(BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, ETH_HLEN));
+	EMIT(BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 2));
+	EMIT(BPF_MOV32_IMM(BPF_REG_0, ctx->default_verdict));
+	EMIT(BPF_EXIT_INSN());
+	return 0;
+}
+
+int bpfilter_gen_epilogue(struct bpfilter_gen_ctx *ctx)
+{
+	EMIT(BPF_MOV32_IMM(BPF_REG_0, ctx->default_verdict));
+	EMIT(BPF_EXIT_INSN());
+	return 0;
+}
+
+static int bpfilter_gen_check_entry(const struct bpfilter_ipt_ip *ent)
+{
+#define M_FF	"\xff\xff\xff\xff"
+	static const __u8 mask1[IFNAMSIZ] = M_FF M_FF M_FF M_FF;
+	static const __u8 mask0[IFNAMSIZ] = { };
+	int ones = strlen(ent->in_iface); ones += ones > 0;
+#undef M_FF
+	if (strlen(ent->out_iface) > 0)
+		return -ENOTSUPP;
+	if (memcmp(ent->in_iface_mask, mask1, ones) ||
+	    memcmp(&ent->in_iface_mask[ones], mask0, sizeof(mask0) - ones))
+		return -ENOTSUPP;
+	if ((ent->src_mask != 0 && ent->src_mask != 0xffffffff) ||
+	    (ent->dst_mask != 0 && ent->dst_mask != 0xffffffff))
+		return -ENOTSUPP;
+
+	return 0;
+}
+
+int bpfilter_gen_append(struct bpfilter_gen_ctx *ctx,
+			struct bpfilter_ipt_ip *ent, int verdict)
+{
+	u32 match_xdp = verdict == -1 ? XDP_DROP : XDP_PASS;
+	int ret, ifindex, match_state = 0;
+
+	/* convention R1: tmp, R2: data, R3: data_end, R9: xdp_buff */
+	ret = bpfilter_gen_check_entry(ent);
+	if (ret < 0)
+		return ret;
+	if (ent->src_mask == 0 && ent->dst_mask == 0)
+		return 0;
+
+	ifindex = if_nametoindex(ent->in_iface);
+	if (!ifindex)
+		return 0;
+	if (ctx->ifindex && ctx->ifindex != ifindex)
+		return -ENOTSUPP;
+
+	ctx->ifindex = ifindex;
+	match_state = !!ent->src_mask + !!ent->dst_mask;
+
+	EMIT(BPF_MOV64_REG(BPF_REG_1, BPF_REG_2));
+	EMIT(BPF_MOV32_IMM(BPF_REG_5, 0));
+	EMIT(BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_1,
+			 offsetof(struct ethhdr, h_proto)));
+	EMIT(BPF_JMP_IMM(BPF_JNE, BPF_REG_4, htons(ETH_P_IP),
+			 3 + match_state * 3));
+	EMIT(BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+			   sizeof(struct ethhdr) + sizeof(struct iphdr)));
+	EMIT(BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1 + match_state * 3));
+	EMIT(BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -(int)sizeof(struct iphdr)));
+	if (ent->src_mask) {
+		EMIT(BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				 offsetof(struct iphdr, saddr)));
+		EMIT(BPF_JMP_IMM(BPF_JNE, BPF_REG_4, ent->src, 1));
+		EMIT(BPF_ALU32_IMM(BPF_ADD, BPF_REG_5, 1));
+	}
+	if (ent->dst_mask) {
+		EMIT(BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				 offsetof(struct iphdr, daddr)));
+		EMIT(BPF_JMP_IMM(BPF_JNE, BPF_REG_4, ent->dst, 1));
+		EMIT(BPF_ALU32_IMM(BPF_ADD, BPF_REG_5, 1));
+	}
+	EMIT(BPF_JMP_IMM(BPF_JNE, BPF_REG_5, match_state, 2));
+	EMIT(BPF_MOV32_IMM(BPF_REG_0, match_xdp));
+	EMIT(BPF_EXIT_INSN());
+	return 0;
+}
+
+int bpfilter_gen_commit(struct bpfilter_gen_ctx *ctx)
+{
+	int ret;
+
+	ret = bpf_prog_load(BPF_PROG_TYPE_XDP, ctx->img,
+			    ctx->len_cur, ctx->ifindex);
+	if (ret > 0)
+		ctx->offloaded = true;
+	if (ret < 0)
+		ret = bpf_prog_load(BPF_PROG_TYPE_XDP, ctx->img,
+				    ctx->len_cur, 0);
+	if (ret > 0) {
+		ctx->fd = ret;
+		ret = bpfilter_load_dev(ctx);
+	}
+
+	return ret < 0 ? ret : 0;
+}
+
+void bpfilter_gen_destroy(struct bpfilter_gen_ctx *ctx)
+{
+	free(ctx->img);
+	close(ctx->fd);
+}
diff --git a/net/bpfilter/init.c b/net/bpfilter/init.c
index 699f3f623189..14e621a03217 100644
--- a/net/bpfilter/init.c
+++ b/net/bpfilter/init.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <sys/socket.h>
 #include <errno.h>
+
+#include <sys/socket.h>
+
 #include "bpfilter_mod.h"
 
 static struct bpfilter_table filter_table_ipv4 = {
@@ -22,12 +24,13 @@ int bpfilter_ipv4_init(void)
 	if (err)
 		return err;
 
-	info = bpfilter_ipv4_table_ctor(t);
+	info = bpfilter_ipv4_table_alloc(t, 0);
+	if (!info)
+		return -ENOMEM;
+	info = bpfilter_ipv4_table_finalize(t, info, 0, 0);
 	if (!info)
 		return -ENOMEM;
-
 	t->info = info;
-
 	return bpfilter_table_add(&filter_table_ipv4);
 }
 
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
index e0273ca201ad..ebd8a4fb1e95 100644
--- a/net/bpfilter/main.c
+++ b/net/bpfilter/main.c
@@ -1,20 +1,23 @@
 // SPDX-License-Identifier: GPL-2.0
 #define _GNU_SOURCE
-#include <sys/uio.h>
 #include <errno.h>
 #include <stdio.h>
-#include <sys/socket.h>
 #include <fcntl.h>
 #include <unistd.h>
-#include "include/uapi/linux/bpf.h"
+
+#include <sys/uio.h>
+#include <sys/socket.h>
+
 #include <asm/unistd.h>
+
+#include "include/uapi/linux/bpf.h"
+
 #include "bpfilter_mod.h"
 #include "msgfmt.h"
 
 extern long int syscall (long int __sysno, ...);
 
-static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
-			  unsigned int size)
+int sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
 {
 	return syscall(321, cmd, attr, size);
 }
@@ -39,7 +42,7 @@ int copy_to_user(void *addr, const void *src, int len)
 	struct iovec local;
 	struct iovec remote;
 
-	local.iov_base = (void *) src;
+	local.iov_base = (void *)src;
 	local.iov_len = len;
 	remote.iov_base = addr;
 	remote.iov_len = len;
diff --git a/net/bpfilter/sockopt.c b/net/bpfilter/sockopt.c
index 43687daf51a3..26ad12a11736 100644
--- a/net/bpfilter/sockopt.c
+++ b/net/bpfilter/sockopt.c
@@ -1,10 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <sys/socket.h>
 #include <errno.h>
 #include <string.h>
 #include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/socket.h>
+
 #include "bpfilter_mod.h"
 
+/* TODO: Get all of this in here properly done in encoding/decoding layer. */
 static int fetch_name(void *addr, int len, char *name, int name_len)
 {
 	if (copy_from_user(name, addr, name_len))
@@ -55,12 +59,17 @@ int bpfilter_get_info(void *addr, int len)
 	return err;
 }
 
-static int copy_target(struct bpfilter_standard_target *ut,
-		       struct bpfilter_standard_target *kt)
+static int target_u2k(struct bpfilter_standard_target *kt)
 {
-	struct bpfilter_target *tgt;
-	int sz;
+	kt->target.u.kernel.target =
+		bpfilter_target_get_by_name(kt->target.u.user.name);
+	return kt->target.u.kernel.target ? 0 : -EINVAL;
+}
 
+static int target_k2u(struct bpfilter_standard_target *ut,
+		      struct bpfilter_standard_target *kt)
+{
+	struct bpfilter_target *tgt;
 
 	if (put_user(kt->target.u.target_size,
 		     &ut->target.u.target_size))
@@ -69,12 +78,9 @@ static int copy_target(struct bpfilter_standard_target *ut,
 	tgt = kt->target.u.kernel.target;
 	if (copy_to_user(ut->target.u.user.name, tgt->name, strlen(tgt->name)))
 		return -EFAULT;
-
 	if (put_user(tgt->rev, &ut->target.u.user.revision))
 		return -EFAULT;
-
-	sz = tgt->size;
-	if (copy_to_user(ut->target.data, kt->target.data, sz))
+	if (copy_to_user(ut->target.data, kt->target.data, tgt->size))
 		return -EFAULT;
 
 	return 0;
@@ -84,30 +90,25 @@ static int do_get_entries(void *up,
 			  struct bpfilter_table *tbl,
 			  struct bpfilter_table_info *info)
 {
-	unsigned int total_size = info->size;
 	const struct bpfilter_ipt_entry *ent;
+	unsigned int total_size = info->size;
+	void *base = info->entries;
 	unsigned int off;
-	void *base;
-
-	base = info->entries;
 
 	for (off = 0; off < total_size; off += ent->next_offset) {
-		struct bpfilter_xt_counters *cntrs;
 		struct bpfilter_standard_target *tgt;
+		struct bpfilter_xt_counters *cntrs;
 
 		ent = base + off;
 		if (copy_to_user(up + off, ent, sizeof(*ent)))
 			return -EFAULT;
-
-		/* XXX Just clear counters for now. XXX */
+		/* XXX: Just clear counters for now. */
 		cntrs = up + off + offsetof(struct bpfilter_ipt_entry, cntrs);
 		if (put_user(0, &cntrs->packet_cnt) ||
 		    put_user(0, &cntrs->byte_cnt))
 			return -EINVAL;
-
-		tgt = (void *) ent + ent->target_offset;
-		dprintf(debug_fd, "target.verdict %d\n", tgt->verdict);
-		if (copy_target(up + off + ent->target_offset, tgt))
+		tgt = (void *)ent + ent->target_offset;
+		if (target_k2u(up + off + ent->target_offset, tgt))
 			return -EFAULT;
 	}
 	return 0;
@@ -123,31 +124,113 @@ int bpfilter_get_entries(void *cmd, int len)
 
 	if (len < sizeof(struct bpfilter_ipt_get_entries))
 		return -EINVAL;
-
 	if (copy_from_user(&req, cmd, sizeof(req)))
 		return -EFAULT;
-
 	tbl = bpfilter_table_get_by_name(req.name, strlen(req.name));
 	if (!tbl)
 		return -ENOENT;
-
 	info = tbl->info;
 	if (!info) {
 		err = -ENOENT;
 		goto out_put;
 	}
-
 	if (info->size != req.size) {
 		err = -EINVAL;
 		goto out_put;
 	}
-
 	err = do_get_entries(uptr->entries, tbl, info);
-	dprintf(debug_fd, "do_get_entries %d req.size %d\n", err, req.size);
-
 out_put:
 	bpfilter_table_put(tbl);
+	return err;
+}
 
+static int do_set_replace(struct bpfilter_ipt_replace *req, void *base,
+			  struct bpfilter_table *tbl)
+{
+	unsigned int total_size = req->size;
+	struct bpfilter_table_info *info;
+	struct bpfilter_ipt_entry *ent;
+	struct bpfilter_gen_ctx ctx;
+	unsigned int off, sents = 0, ents = 0;
+	int ret;
+
+	ret = bpfilter_gen_init(&ctx);
+	if (ret < 0)
+		return ret;
+	ret = bpfilter_gen_prologue(&ctx);
+	if (ret < 0)
+		return ret;
+	info = bpfilter_ipv4_table_alloc(tbl, total_size);
+	if (!info)
+		return -ENOMEM;
+	if (copy_from_user(&info->entries[0], base, req->size)) {
+		free(info);
+		return -EFAULT;
+	}
+	base = &info->entries[0];
+	for (off = 0; off < total_size; off += ent->next_offset) {
+		struct bpfilter_standard_target *tgt;
+		ent = base + off;
+		ents++;
+		sents += ent->next_offset;
+		tgt = (void *) ent + ent->target_offset;
+		target_u2k(tgt);
+		ret = bpfilter_gen_append(&ctx, &ent->ip, tgt->verdict);
+                if (ret < 0)
+                        goto err;
+	}
+	info->num_entries = ents;
+	info->size = sents;
+	memcpy(info->hook_entry, req->hook_entry, sizeof(info->hook_entry));
+	memcpy(info->underflow, req->underflow, sizeof(info->hook_entry));
+	ret = bpfilter_gen_epilogue(&ctx);
+	if (ret < 0)
+		goto err;
+	ret = bpfilter_gen_commit(&ctx);
+	if (ret < 0)
+		goto err;
+	free(tbl->info);
+	tbl->info = info;
+	bpfilter_gen_destroy(&ctx);
+	dprintf(debug_fd, "offloaded %u\n", ctx.offloaded);
+	return ret;
+err:
+	free(info);
+	return ret;
+}
+
+int bpfilter_set_replace(void *cmd, int len)
+{
+	struct bpfilter_ipt_replace *uptr = cmd;
+	struct bpfilter_ipt_replace req;
+	struct bpfilter_table_info *info;
+	struct bpfilter_table *tbl;
+	int err;
+
+	if (len < sizeof(req))
+		return -EINVAL;
+	if (copy_from_user(&req, cmd, sizeof(req)))
+		return -EFAULT;
+	if (req.num_counters >= INT_MAX / sizeof(struct bpfilter_xt_counters))
+		return -ENOMEM;
+	if (req.num_counters == 0)
+		return -EINVAL;
+	req.name[sizeof(req.name) - 1] = 0;
+	tbl = bpfilter_table_get_by_name(req.name, strlen(req.name));
+	if (!tbl)
+		return -ENOENT;
+	info = tbl->info;
+	if (!info) {
+		err = -ENOENT;
+		goto out_put;
+	}
+	err = do_set_replace(&req, uptr->entries, tbl);
+out_put:
+	bpfilter_table_put(tbl);
 	return err;
 }
 
+int bpfilter_set_add_counters(void *cmd, int len)
+{
+	return 0;
+}
diff --git a/net/bpfilter/tables.c b/net/bpfilter/tables.c
index 9a96599be634..e0dab283092d 100644
--- a/net/bpfilter/tables.c
+++ b/net/bpfilter/tables.c
@@ -1,8 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <sys/socket.h>
 #include <errno.h>
 #include <string.h>
+
+#include <sys/socket.h>
+
 #include <linux/hashtable.h>
+
 #include "bpfilter_mod.h"
 
 static unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
diff --git a/net/bpfilter/tgts.c b/net/bpfilter/tgts.c
index eac5e8ac0b4b..0a00bc289d3d 100644
--- a/net/bpfilter/tgts.c
+++ b/net/bpfilter/tgts.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <sys/socket.h>
+
 #include "bpfilter_mod.h"
 
 struct bpfilter_target std_tgt = {
-- 
2.9.5

      parent reply	other threads:[~2018-05-03  4:36 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-03  4:36 [PATCH v2 net-next 0/4] bpfilter Alexei Starovoitov
2018-05-03  4:36 ` [PATCH v2 net-next 1/4] umh: introduce fork_usermode_blob() helper Alexei Starovoitov
2018-05-04 19:56   ` Luis R. Rodriguez
2018-05-04 19:56     ` Luis R. Rodriguez
2018-05-04 19:56     ` Luis R. Rodriguez
2018-05-05  1:37     ` Alexei Starovoitov
2018-05-05  1:37       ` Alexei Starovoitov
2018-05-05  1:37       ` Alexei Starovoitov
2018-05-07 18:39       ` Luis R. Rodriguez
2018-05-07 18:39         ` Luis R. Rodriguez
2018-05-07 18:39         ` Luis R. Rodriguez
2018-05-09  2:25         ` Alexei Starovoitov
2018-05-09  2:25           ` Alexei Starovoitov
2018-05-09  2:25           ` Alexei Starovoitov
2018-05-10 22:27     ` Kees Cook
2018-05-10 22:27       ` Kees Cook
2018-05-10 22:27       ` Kees Cook
2018-05-10 23:16       ` Alexei Starovoitov
2018-05-10 23:16         ` Alexei Starovoitov
2018-05-10 23:16         ` Alexei Starovoitov
2018-05-05  4:48   ` Jann Horn
2018-05-05 16:24     ` Alexei Starovoitov
2018-05-03  4:36 ` [PATCH v2 net-next 2/4] net: add skeleton of bpfilter kernel module Alexei Starovoitov
2018-05-03 14:23   ` Edward Cree
2018-05-05  1:00     ` Alexei Starovoitov
2018-05-07 15:24   ` Harald Welte
2018-05-07 15:50     ` David Miller
2018-05-07 18:51   ` Luis R. Rodriguez
2018-05-07 18:51     ` Luis R. Rodriguez
2018-05-07 18:51     ` Luis R. Rodriguez
2018-05-09  2:29     ` Alexei Starovoitov
2018-05-09  2:29       ` Alexei Starovoitov
2018-05-09  2:29       ` Alexei Starovoitov
2018-05-03  4:36 ` [PATCH RFC v2 net-next 3/4] bpfilter: add iptable get/set parsing Alexei Starovoitov
2018-05-03  4:36 ` Alexei Starovoitov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180503043604.1604587-5-ast@kernel.org \
    --to=ast@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=gregkh@linuxfoundation.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=netdev@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.