All of lore.kernel.org
 help / color / mirror / Atom feed
From: Edward Cree <ecree@solarflare.com>
To: <linux-net-drivers@solarflare.com>, <netdev@vger.kernel.org>
Cc: <davem@davemloft.net>
Subject: [RFC PATCH v2 net-next 12/12] net: listify jited Generic XDP processing on x86_64
Date: Tue, 26 Jun 2018 19:22:27 +0100	[thread overview]
Message-ID: <5f1bed11-146b-3924-415e-a1e3fc471da5@solarflare.com> (raw)
In-Reply-To: <fa3d7e58-e7b6-ad0c-619f-824c25ed0d97@solarflare.com>

When JITing an eBPF program on x86_64, also JIT a list_func that calls the
 bpf_func in a loop.  Since this is a direct call, it should perform better
 than indirect-calling bpf_func in a loop.
Since getting the percpu 'redirect_info' variable is ugly and magic, pass
 a pointer into the list_func as a parameter instead of calculating it
 inside the loop.  This is safe because BPF execution is not preemptible,
 so the percpu variable can't get moved while we're using it.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 arch/x86/net/bpf_jit_comp.c | 164 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/filter.h      |  19 +++--
 kernel/bpf/core.c           |  18 +++--
 net/core/dev.c              |   5 +-
 4 files changed, 195 insertions(+), 11 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2580cd2e98b1..3e06dd79adda 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1060,6 +1060,169 @@ struct x64_jit_data {
 	struct jit_context ctx;
 };
 
+static void try_do_jit_list(struct bpf_prog *bpf_prog,
+			    unsigned int (*bpf_func)(const void *ctx,
+						     const struct bpf_insn *insn))
+{
+	/* list_func takes three arguments:
+	 * struct list_head *list [RDI]
+	 * const struct bpf_insn *insn [RSI]
+	 * const struct redirect_info *percpu_ri [RDX]
+	 *
+	 * Layout of struct bpf_work on x86_64:
+	 * struct list_head {
+	 *	struct list_head *next; // 0x0
+	 *	struct list_head *prev; // 0x8
+	 * } list; // 0x0
+	 * void *ctx; 0x10
+	 * struct redirect_info {
+	 *	u32 ifindex; // 0x18
+	 *	u32 flags; // 0x1c
+	 *	struct bpf_map *map; // 0x20
+	 *	struct bpf_map *map_to_flush; // 0x28
+	 *	unsigned long   map_owner; // 0x30
+	 * } ri; // 0x18
+	 * unsigned long ret; // 0x38
+	 * (total size 0x40 bytes)
+	 *
+	 * Desired function body:
+	 * struct redirect_info *ri = percpu_ri; [R12]
+	 * struct bpf_work *work; [RBP]
+	 *
+	 * list_for_each_entry(work, list, list) {
+	 *	work->ret = (*bpf_func)(work->ctx, insn);
+	 *	work->ri = *ri;
+	 * }
+	 *
+	 * Assembly to emit:
+	 * ; save CSRs
+	 *	push %rbx
+	 *	push %rbp
+	 *	push %r12
+	 * ; stash pointer to redirect_info
+	 *	mov %rdx,%r12	; ri = percpu_ri
+	 * ; start list
+	 *	mov %rdi,%rbp	; head = list
+	 * next:		; while (true) {
+	 *	mov (%rbp),%rbx	; rbx = head->next
+	 *	cmp %rbx,%rdi	; if (rbx == list)
+	 *	je out		;	break
+	 *	mov %rbx,%rbp	; head = rbx
+	 *	push %rdi	; save list
+	 *	push %rsi	; save insn (is still arg2)
+	 * ; struct bpf_work *work = head (container_of, but list is first member)
+	 *	mov 0x10(%rbp),%rdi; arg1 = work->ctx
+	 *	callq bpf_func  ; rax = (*bpf_func)(work->ctx, insn)
+	 *	mov %rax,0x38(%rbp); work->ret = rax
+	 * ; work->ri = *ri
+	 *	mov (%r12),%rdx
+	 *	mov %rdx,0x18(%rbp)
+	 *	mov 0x8(%r12),%rdx
+	 *	mov %rdx,0x20(%rbp)
+	 *	mov 0x10(%r12),%rdx
+	 *	mov %rdx,0x28(%rbp)
+	 *	mov 0x18(%r12),%rdx
+	 *	mov %rdx,0x30(%rbp)
+	 *	pop %rsi	; restore insn
+	 *	pop %rdi	; restore list
+	 *	jmp next	; }
+	 * out:
+	 * ; restore CSRs and return
+	 *	pop %r12
+	 *	pop %rbp
+	 *	pop %rbx
+	 *	retq
+	 */
+	u8 *image, *prog, *from_to_out, *next;
+	struct bpf_binary_header *header;
+	int off, cnt = 0;
+	s64 jmp_offset;
+
+	/* Prog should be 81 bytes; let's round up to 128 */
+	header = bpf_jit_binary_alloc(128, &image, 1, jit_fill_hole);
+	prog = image;
+
+	/* push rbx */
+	EMIT1(0x53);
+	/* push rbp */
+	EMIT1(0x55);
+	/* push %r12 */
+	EMIT2(0x41, 0x54);
+	/* mov %rdx,%r12 */
+	EMIT3(0x49, 0x89, 0xd4);
+	/* mov %rdi,%rbp */
+	EMIT3(0x48, 0x89, 0xfd);
+	next = prog;
+	/* mov 0x0(%rbp),%rbx */
+	EMIT4(0x48, 0x8b, 0x5d, 0x00);
+	/* cmp %rbx,%rdi */
+	EMIT3(0x48, 0x39, 0xdf);
+	/* je out */
+	EMIT2(X86_JE, 0);
+	from_to_out = prog; /* record . to patch this jump later */
+	/* mov %rbx,%rbp */
+	EMIT3(0x48, 0x89, 0xdd);
+	/* push %rdi */
+	EMIT1(0x57);
+	/* push %rsi */
+	EMIT1(0x56);
+	/* mov 0x10(%rbp),%rdi */
+	EMIT4(0x48, 0x8b, 0x7d, 0x10);
+	/* e8 callq bpf_func */
+	jmp_offset = (u8 *)bpf_func - (prog + 5);
+	if (!is_simm32(jmp_offset)) {
+		pr_err("call out of range to BPF func %p from list image %p\n",
+		       bpf_func, image);
+		goto fail;
+	}
+	EMIT1_off32(0xE8, jmp_offset);
+	/* mov %rax,0x38(%rbp) */
+	EMIT4(0x48, 0x89, 0x45, 0x38);
+	/* mov (%r12),%rdx */
+	EMIT4(0x49, 0x8b, 0x14, 0x24);
+	/* mov %rdx,0x18(%rbp) */
+	EMIT4(0x48, 0x89, 0x55, 0x18);
+	for (off = 0x8; off < 0x20; off += 0x8) {
+		/* mov off(%r12),%rdx */
+		EMIT4(0x49, 0x8b, 0x54, 0x24);
+		EMIT1(off);
+		/* mov %rdx,0x18+off(%rbp) */
+		EMIT4(0x48, 0x89, 0x55, 0x18 + off);
+	}
+	/* pop %rsi */
+	EMIT1(0x5e);
+	/* pop %rdi */
+	EMIT1(0x5f);
+	/* jmp next */
+	jmp_offset = next - (prog + 2);
+	if (WARN_ON(!is_imm8(jmp_offset))) /* can't happen */
+		goto fail;
+	EMIT2(0xeb, jmp_offset);
+	/* out: */
+	jmp_offset = prog - from_to_out;
+	if (WARN_ON(!is_imm8(jmp_offset))) /* can't happen */
+		goto fail;
+	from_to_out[-1] = jmp_offset;
+	/* pop %r12 */
+	EMIT2(0x41, 0x5c);
+	/* pop %rbp */
+	EMIT1(0x5d);
+	/* pop %rbx */
+	EMIT1(0x5b);
+	/* retq */
+	EMIT1(0xc3);
+	/* If we were wrong about how much space we needed, scream and shout */
+	WARN_ON(cnt != 81);
+	if (bpf_jit_enable > 1)
+		bpf_jit_dump(0, cnt, 0, image);
+	bpf_jit_binary_lock_ro(header);
+	bpf_prog->list_func = (void *)image;
+	bpf_prog->jited_list = 1;
+	return;
+fail:
+	bpf_jit_binary_free(header);
+}
+
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
@@ -1176,6 +1339,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		prog->bpf_func = (void *)image;
 		prog->jited = 1;
 		prog->jited_len = proglen;
+		try_do_jit_list(prog, prog->bpf_func);
 	} else {
 		prog = orig_prog;
 	}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7d813034e286..ad1e75bf0991 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -517,7 +517,8 @@ struct bpf_prog {
 					    const struct bpf_insn *insn);
 	/* Takes a list of struct bpf_work */
 	void			(*list_func)(struct list_head *list,
-					     const struct bpf_insn *insn);
+					     const struct bpf_insn *insn,
+					     const struct redirect_info *percpu_ri);
 	/* Instructions for interpreter */
 	union {
 		struct sock_filter	insns[0];
@@ -532,7 +533,7 @@ struct sk_filter {
 };
 
 #define BPF_PROG_RUN(filter, ctx)  (*(filter)->bpf_func)(ctx, (filter)->insnsi)
-#define BPF_LIST_PROG_RUN(filter, list) (*(filter)->list_func)(list, (filter)->insnsi)
+#define BPF_LIST_PROG_RUN(filter, list, percpu) (*(filter)->list_func)(list, (filter)->insnsi, percpu)
 
 #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
 
@@ -638,10 +639,11 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
 }
 
 static __always_inline void bpf_list_prog_run_xdp(const struct bpf_prog *prog,
-						  struct list_head *list)
+						  struct list_head *list,
+						  const struct redirect_info *percpu_ri)
 {
 	/* Caller must hold rcu_read_lock(), as per bpf_prog_run_xdp(). */
-	BPF_LIST_PROG_RUN(prog, list);
+	BPF_LIST_PROG_RUN(prog, list, percpu_ri);
 }
 
 static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
@@ -756,6 +758,15 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
 	return (void *)addr;
 }
 
+static inline struct bpf_binary_header *
+bpf_list_jit_binary_hdr(const struct bpf_prog *fp)
+{
+	unsigned long real_start = (unsigned long)fp->list_func;
+	unsigned long addr = real_start & PAGE_MASK;
+
+	return (void *)addr;
+}
+
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
 {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c35da826cc3b..028be88c4af8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -621,15 +621,22 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
  */
 void __weak bpf_jit_free(struct bpf_prog *fp)
 {
-	if (fp->jited) {
-		struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
+	struct bpf_binary_header *hdr;
 
+	if (fp->jited) {
+		hdr = bpf_jit_binary_hdr(fp);
 		bpf_jit_binary_unlock_ro(hdr);
 		bpf_jit_binary_free(hdr);
 
 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
 	}
 
+	if (fp->jited_list) {
+		hdr = bpf_list_jit_binary_hdr(fp);
+		bpf_jit_binary_unlock_ro(hdr);
+		bpf_jit_binary_free(hdr);
+	}
+
 	bpf_prog_unlock_free(fp);
 }
 
@@ -1358,13 +1365,13 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
 
 #define LIST_PROG_NAME(stack_size) __bpf_list_prog_run##stack_size
 #define DEFINE_BPF_LIST_PROG_RUN(stack_size) \
-static void LIST_PROG_NAME(stack_size)(struct list_head *list, const struct bpf_insn *insn) \
+static void LIST_PROG_NAME(stack_size)(struct list_head *list, const struct bpf_insn *insn, const struct redirect_info *percpu_ri) \
 { \
 	struct bpf_work *work; \
 \
 	list_for_each_entry(work, list, list) { \
 		work->ret = PROG_NAME(stack_size)(work->ctx, insn); \
-		work->ri = *this_cpu_ptr(&redirect_info); \
+		work->ri = *percpu_ri; \
 	} \
 }
 
@@ -1398,7 +1405,8 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
 #undef PROG_NAME_LIST
 #define PROG_NAME_LIST(stack_size) LIST_PROG_NAME(stack_size),
 static void (*list_interpreters[])(struct list_head *list,
-				   const struct bpf_insn *insn) = {
+				   const struct bpf_insn *insn,
+				   const struct redirect_info *percpu_ri) = {
 EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
 EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
diff --git a/net/core/dev.c b/net/core/dev.c
index 746112c22afd..7c1879045ef8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4214,6 +4214,7 @@ static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
 				struct sk_buff_head *list,
 				struct sk_buff_head *pass_list)
 {
+	const struct redirect_info *percpu_ri = this_cpu_ptr(&redirect_info);
 	struct xdp_work (*xwa)[NAPI_POLL_WEIGHT], *xw;
 	struct bpf_work *bw;
 	struct sk_buff *skb;
@@ -4249,11 +4250,11 @@ static void do_xdp_list_generic(struct bpf_prog *xdp_prog,
 
 	if (xdp_prog->list_func && (xdp_prog->jited_list ||
 				    !xdp_prog->jited))
-		bpf_list_prog_run_xdp(xdp_prog, &xdp_list);
+		bpf_list_prog_run_xdp(xdp_prog, &xdp_list, percpu_ri);
 	else
 		list_for_each_entry(bw, &xdp_list, list) {
 			bw->ret = bpf_prog_run_xdp(xdp_prog, bw->ctx);
-			bw->ri = *this_cpu_ptr(&redirect_info);
+			bw->ri = *percpu_ri;
 		}
 
 	for (i = 0; i < n; i++) {

  parent reply	other threads:[~2018-06-26 18:22 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-26 18:15 [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 01/12] net: core: trivial netif_receive_skb_list() entry point Edward Cree
2018-06-27  0:06   ` Eric Dumazet
2018-06-27 14:03     ` Edward Cree
2018-06-26 18:17 ` [RFC PATCH v2 net-next 02/12] sfc: batch up RX delivery Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 03/12] net: core: unwrap skb list receive slightly further Edward Cree
2018-06-26 18:18 ` [RFC PATCH v2 net-next 04/12] net: core: Another step of skb receive list processing Edward Cree
2018-06-26 18:19 ` [RFC PATCH v2 net-next 05/12] net: core: another layer of lists, around PF_MEMALLOC skb handling Edward Cree
2018-06-26 18:19 ` [RFC PATCH v2 net-next 06/12] net: core: propagate SKB lists through packet_type lookup Edward Cree
2018-06-27 14:36   ` Willem de Bruijn
2018-06-27 14:49     ` Edward Cree
2018-06-27 16:00       ` Willem de Bruijn
2018-06-27 16:34         ` Edward Cree
2018-06-26 18:20 ` [RFC PATCH v2 net-next 07/12] net: ipv4: listified version of ip_rcv Edward Cree
2018-06-27 12:32   ` Florian Westphal
2018-06-26 18:20 ` [RFC PATCH v2 net-next 08/12] net: ipv4: listify ip_rcv_finish Edward Cree
2018-06-26 18:21 ` [RFC PATCH v2 net-next 09/12] net: don't bother calling list RX functions on empty lists Edward Cree
2018-06-26 18:21 ` [RFC PATCH v2 net-next 10/12] net: listify Generic XDP processing, part 1 Edward Cree
2018-06-26 18:22 ` [RFC PATCH v2 net-next 11/12] net: listify Generic XDP processing, part 2 Edward Cree
2018-06-26 18:22 ` Edward Cree [this message]
2018-06-26 20:48 ` [RFC PATCH v2 net-next 00/12] Handle multiple received packets at each stage Tom Herbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5f1bed11-146b-3924-415e-a1e3fc471da5@solarflare.com \
    --to=ecree@solarflare.com \
    --cc=davem@davemloft.net \
    --cc=linux-net-drivers@solarflare.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.