All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH stable 4.4 0/9] BPF stable patches
@ 2018-01-30  2:37 Daniel Borkmann
  2018-01-30  2:37 ` [PATCH stable 4.4 1/9] bpf: fix branch pruning logic Daniel Borkmann
                   ` (9 more replies)
  0 siblings, 10 replies; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable

All for 4.4 backported and (limited) testing.

Thanks!

Alexei Starovoitov (3):
  bpf: fix bpf_tail_call() x64 JIT
  bpf: introduce BPF_JIT_ALWAYS_ON config
  bpf: fix 32-bit divide by zero

Daniel Borkmann (4):
  bpf: fix branch pruning logic
  bpf: arsh is not supported in 32 bit alu thus reject it
  bpf: avoid false sharing of map refcount with max_entries
  bpf: reject stores into ctx via st and xadd

Eric Dumazet (2):
  x86: bpf_jit: small optimization in emit_bpf_tail_call()
  bpf: fix divides by zero

 arch/arm64/Kconfig          |  1 +
 arch/s390/Kconfig           |  1 +
 arch/x86/Kconfig            |  1 +
 arch/x86/net/bpf_jit_comp.c | 13 ++++-----
 include/linux/bpf.h         | 16 ++++++++---
 init/Kconfig                |  7 +++++
 kernel/bpf/core.c           | 30 ++++++++++++++++---
 kernel/bpf/verifier.c       | 70 +++++++++++++++++++++++++++++++++++++++++++++
 lib/test_bpf.c              | 13 +++++----
 net/Kconfig                 |  3 ++
 net/core/filter.c           |  8 +++++-
 net/core/sysctl_net_core.c  |  6 ++++
 net/socket.c                |  9 ++++++
 13 files changed, 157 insertions(+), 21 deletions(-)

-- 
2.9.5

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH stable 4.4 1/9] bpf: fix branch pruning logic
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
@ 2018-01-30  2:37 ` Daniel Borkmann
  2018-02-01  8:07   ` Patch "bpf: fix branch pruning logic" has been added to the 4.4-stable tree gregkh
  2018-01-30  2:37 ` [PATCH stable 4.4 2/9] x86: bpf_jit: small optimization in emit_bpf_tail_call() Daniel Borkmann
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable

From: Alexei Starovoitov <ast@fb.com>

[ Upstream commit c131187db2d3fa2f8bf32fdf4e9a4ef805168467 ]

when the verifier detects that register contains a runtime constant
and it's compared with another constant it will prune exploration
of the branch that is guaranteed not to be taken at runtime.
This is all correct, but malicious program may be constructed
in such a way that it always has a constant comparison and
the other branch is never taken under any conditions.
In this case such path through the program will not be explored
by the verifier. It won't be taken at run-time either, but since
all instructions are JITed the malicious program may cause JITs
to complain about using reserved fields, etc.
To fix the issue we have to track the instructions explored by
the verifier and sanitize instructions that are dead at run time
with NOPs. We cannot reject such dead code, since llvm generates
it for valid C code, since it doesn't do as much data flow
analysis as the verifier does.

Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 014c2d7..a626797 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -191,6 +191,7 @@ struct bpf_insn_aux_data {
 		enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
 		struct bpf_map *map_ptr;	/* pointer for call insn into lookup_elem */
 	};
+	bool seen; /* this insn was processed by the verifier */
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -1793,6 +1794,7 @@ static int do_check(struct verifier_env *env)
 			print_bpf_insn(env, insn);
 		}
 
+		env->insn_aux_data[insn_idx].seen = true;
 		if (class == BPF_ALU || class == BPF_ALU64) {
 			err = check_alu_op(env, insn);
 			if (err)
@@ -1988,6 +1990,7 @@ static int do_check(struct verifier_env *env)
 					return err;
 
 				insn_idx++;
+				env->insn_aux_data[insn_idx].seen = true;
 			} else {
 				verbose("invalid BPF_LD mode\n");
 				return -EINVAL;
@@ -2125,6 +2128,7 @@ static int adjust_insn_aux_data(struct verifier_env *env, u32 prog_len,
 				u32 off, u32 cnt)
 {
 	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+	int i;
 
 	if (cnt == 1)
 		return 0;
@@ -2134,6 +2138,8 @@ static int adjust_insn_aux_data(struct verifier_env *env, u32 prog_len,
 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
 	memcpy(new_data + off + cnt - 1, old_data + off,
 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+	for (i = off; i < off + cnt - 1; i++)
+		new_data[i].seen = true;
 	env->insn_aux_data = new_data;
 	vfree(old_data);
 	return 0;
@@ -2152,6 +2158,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct verifier_env *env, u32 off,
 	return new_prog;
 }
 
+/* The verifier does more data flow analysis than llvm and will not explore
+ * branches that are dead at run time. Malicious programs can have dead code
+ * too. Therefore replace all dead at-run-time code with nops.
+ */
+static void sanitize_dead_code(struct verifier_env *env)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
+	struct bpf_insn *insn = env->prog->insnsi;
+	const int insn_cnt = env->prog->len;
+	int i;
+
+	for (i = 0; i < insn_cnt; i++) {
+		if (aux_data[i].seen)
+			continue;
+		memcpy(insn + i, &nop, sizeof(nop));
+	}
+}
+
 /* convert load instructions that access fields of 'struct __sk_buff'
  * into sequence of instructions that access fields of 'struct sk_buff'
  */
@@ -2371,6 +2396,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	free_states(env);
 
 	if (ret == 0)
+		sanitize_dead_code(env);
+
+	if (ret == 0)
 		/* program is valid, convert *(u32*)(ctx + off) accesses */
 		ret = convert_ctx_accesses(env);
 
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH stable 4.4 2/9] x86: bpf_jit: small optimization in emit_bpf_tail_call()
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
  2018-01-30  2:37 ` [PATCH stable 4.4 1/9] bpf: fix branch pruning logic Daniel Borkmann
@ 2018-01-30  2:37 ` Daniel Borkmann
  2018-02-01  8:07   ` Patch "x86: bpf_jit: small optimization in emit_bpf_tail_call()" has been added to the 4.4-stable tree gregkh
  2018-01-30  2:37 ` [PATCH stable 4.4 3/9] bpf: fix bpf_tail_call() x64 JIT Daniel Borkmann
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable, Eric Dumazet, David S . Miller

From: Eric Dumazet <edumazet@google.com>

[ upstream commit 84ccac6e7854ebbfb56d2fc6d5bef9be49bb304c ]

Saves 4 bytes replacing following instructions :

lea rax, [rsi + rdx * 8 + offsetof(...)]
mov rax, qword ptr [rax]
cmp rax, 0

by :

mov rax, [rsi + rdx * 8 + offsetof(...)]
test rax, rax

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7599197..33f002e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -269,7 +269,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 	EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
 	      offsetof(struct bpf_array, map.max_entries));
 	EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
-#define OFFSET1 47 /* number of bytes to jump */
+#define OFFSET1 43 /* number of bytes to jump */
 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 	label1 = cnt;
 
@@ -278,21 +278,20 @@ static void emit_bpf_tail_call(u8 **pprog)
 	 */
 	EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
 	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 36
+#define OFFSET2 32
 	EMIT2(X86_JA, OFFSET2);                   /* ja out */
 	label2 = cnt;
 	EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
 	EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
 
 	/* prog = array->ptrs[index]; */
-	EMIT4_off32(0x48, 0x8D, 0x84, 0xD6,       /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
+	EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
 		    offsetof(struct bpf_array, ptrs));
-	EMIT3(0x48, 0x8B, 0x00);                  /* mov rax, qword ptr [rax] */
 
 	/* if (prog == NULL)
 	 *   goto out;
 	 */
-	EMIT4(0x48, 0x83, 0xF8, 0x00);            /* cmp rax, 0 */
+	EMIT3(0x48, 0x85, 0xC0);		  /* test rax,rax */
 #define OFFSET3 10
 	EMIT2(X86_JE, OFFSET3);                   /* je out */
 	label3 = cnt;
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH stable 4.4 3/9] bpf: fix bpf_tail_call() x64 JIT
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
  2018-01-30  2:37 ` [PATCH stable 4.4 1/9] bpf: fix branch pruning logic Daniel Borkmann
  2018-01-30  2:37 ` [PATCH stable 4.4 2/9] x86: bpf_jit: small optimization in emit_bpf_tail_call() Daniel Borkmann
@ 2018-01-30  2:37 ` Daniel Borkmann
  2018-02-01  8:07   ` Patch "bpf: fix bpf_tail_call() x64 JIT" has been added to the 4.4-stable tree gregkh
  2018-01-30  2:37 ` [PATCH stable 4.4 4/9] bpf: introduce BPF_JIT_ALWAYS_ON config Daniel Borkmann
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable, Alexei Starovoitov, David S . Miller

From: Alexei Starovoitov <ast@fb.com>

[ upstream commit 90caccdd8cc0215705f18b92771b449b01e2474a ]

- bpf prog_array just like all other types of bpf array accepts 32-bit index.
  Clarify that in the comment.
- fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes
- tighten corresponding check in the interpreter to stay consistent

The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag
in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and
though JIT code is wrong it will check bounds correctly.
Hence two fixes tags. All other JITs don't have this problem.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation")
Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper")
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c | 4 ++--
 kernel/bpf/core.c           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 33f002e..33c42b8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -266,9 +266,9 @@ static void emit_bpf_tail_call(u8 **pprog)
 	/* if (index >= array->map.max_entries)
 	 *   goto out;
 	 */
-	EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
+	EMIT2(0x89, 0xD2);                        /* mov edx, edx */
+	EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
 	      offsetof(struct bpf_array, map.max_entries));
-	EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
 #define OFFSET1 43 /* number of bytes to jump */
 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 	label1 = cnt;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3fd76cf..e54ea31 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -517,7 +517,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
 		struct bpf_array *array = container_of(map, struct bpf_array, map);
 		struct bpf_prog *prog;
-		u64 index = BPF_R3;
+		u32 index = BPF_R3;
 
 		if (unlikely(index >= array->map.max_entries))
 			goto out;
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH stable 4.4 4/9] bpf: introduce BPF_JIT_ALWAYS_ON config
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
                   ` (2 preceding siblings ...)
  2018-01-30  2:37 ` [PATCH stable 4.4 3/9] bpf: fix bpf_tail_call() x64 JIT Daniel Borkmann
@ 2018-01-30  2:37 ` Daniel Borkmann
  2018-02-01  8:07   ` Patch "bpf: introduce BPF_JIT_ALWAYS_ON config" has been added to the 4.4-stable tree gregkh
  2018-01-30  2:37 ` [PATCH stable 4.4 5/9] bpf: arsh is not supported in 32 bit alu thus reject it Daniel Borkmann
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable

From: Alexei Starovoitov <ast@kernel.org>

[ upstream commit 290af86629b25ffd1ed6232c4e9107da031705cb ]

The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715.

A quote from goolge project zero blog:
"At this point, it would normally be necessary to locate gadgets in
the host kernel code that can be used to actually leak data by reading
from an attacker-controlled location, shifting and masking the result
appropriately and then using the result of that as offset to an
attacker-controlled address for a load. But piecing gadgets together
and figuring out which ones work in a speculation context seems annoying.
So instead, we decided to use the eBPF interpreter, which is built into
the host kernel - while there is no legitimate way to invoke it from inside
a VM, the presence of the code in the host kernel's text section is sufficient
to make it usable for the attack, just like with ordinary ROP gadgets."

To make attacker job harder introduce BPF_JIT_ALWAYS_ON config
option that removes interpreter from the kernel in favor of JIT-only mode.
So far eBPF JIT is supported by:
x64, arm64, arm32, sparc64, s390, powerpc64, mips64

The start of JITed program is randomized and code page is marked as read-only.
In addition "constant blinding" can be turned on with net.core.bpf_jit_harden

v2->v3:
- move __bpf_prog_ret0 under ifdef (Daniel)

v1->v2:
- fix init order, test_bpf and cBPF (Daniel's feedback)
- fix offloaded bpf (Jakub's feedback)
- add 'return 0' dummy in case something can invoke prog->bpf_func
- retarget bpf tree. For bpf-next the patch would need one extra hunk.
  It will be sent when the trees are merged back to net-next

Considered doing:
  int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT;
but it seems better to land the patch as-is and in bpf-next remove
bpf_jit_enable global variable from all JITs, consolidate in one place
and remove this jit_init() function.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 arch/arm64/Kconfig         |  1 +
 arch/s390/Kconfig          |  1 +
 arch/x86/Kconfig           |  1 +
 init/Kconfig               |  7 +++++++
 kernel/bpf/core.c          | 24 +++++++++++++++++++++++-
 lib/test_bpf.c             | 13 ++++++++-----
 net/Kconfig                |  3 +++
 net/core/filter.c          |  4 +++-
 net/core/sysctl_net_core.c |  6 ++++++
 net/socket.c               |  9 +++++++++
 10 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 14cdc6d..83af36d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -54,6 +54,7 @@ config ARM64
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_BPF_JIT
+	select HAVE_EBPF_JIT
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5ad7b72..2ee95ec 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -123,6 +123,7 @@ config S390
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
+	select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 75d0053..2db9304 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -88,6 +88,7 @@ config X86
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_BPF_JIT			if X86_64
+	select HAVE_EBPF_JIT			if X86_64
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
diff --git a/init/Kconfig b/init/Kconfig
index 235c7a2..ef2f97d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1556,6 +1556,13 @@ config BPF_SYSCALL
 	  Enable the bpf() system call that allows to manipulate eBPF
 	  programs and maps via file descriptors.
 
+config BPF_JIT_ALWAYS_ON
+	bool "Permanently enable BPF JIT and remove BPF interpreter"
+	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+	help
+	  Enables BPF JIT and removes BPF interpreter to avoid
+	  speculative execution of BPF instructions by the interpreter
+
 config SHMEM
 	bool "Use full shmem filesystem" if EXPERT
 	default y
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index e54ea31..c40e25e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -256,6 +256,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 }
 EXPORT_SYMBOL_GPL(__bpf_call_base);
 
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 /**
  *	__bpf_prog_run - run eBPF program on a given context
  *	@ctx: is the data we are operating on
@@ -725,6 +726,13 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		return 0;
 }
 
+#else
+static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn)
+{
+	return 0;
+}
+#endif
+
 bool bpf_prog_array_compatible(struct bpf_array *array,
 			       const struct bpf_prog *fp)
 {
@@ -771,9 +779,23 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
  */
 int bpf_prog_select_runtime(struct bpf_prog *fp)
 {
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 	fp->bpf_func = (void *) __bpf_prog_run;
-
+#else
+	fp->bpf_func = (void *) __bpf_prog_ret0;
+#endif
+
+	/* eBPF JITs can rewrite the program in case constant
+	 * blinding is active. However, in case of error during
+	 * blinding, bpf_int_jit_compile() must always return a
+	 * valid program, which in this case would simply not
+	 * be JITed, but falls back to the interpreter.
+	 */
 	bpf_int_jit_compile(fp);
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+	if (!fp->jited)
+		return -ENOTSUPP;
+#endif
 	bpf_prog_lock_ro(fp);
 
 	/* The tail call compatibility check can only be done at
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 7e26aea..b7908d9 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5304,9 +5304,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
 				return NULL;
 			}
 		}
-		/* We don't expect to fail. */
 		if (*err) {
-			pr_cont("FAIL to attach err=%d len=%d\n",
+			pr_cont("FAIL to prog_create err=%d len=%d\n",
 				*err, fprog.len);
 			return NULL;
 		}
@@ -5325,7 +5324,11 @@ static struct bpf_prog *generate_filter(int which, int *err)
 		fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
 		memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn));
 
-		bpf_prog_select_runtime(fp);
+		*err = bpf_prog_select_runtime(fp);
+		if (*err) {
+			pr_cont("FAIL to select_runtime err=%d\n", *err);
+			return NULL;
+		}
 		break;
 	}
 
@@ -5511,8 +5514,8 @@ static __init int test_bpf(void)
 				pass_cnt++;
 				continue;
 			}
-
-			return err;
+			err_cnt++;
+			continue;
 		}
 
 		pr_cont("jited:%u ", fp->jited);
diff --git a/net/Kconfig b/net/Kconfig
index 127da94..6d94140 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -388,3 +388,6 @@ endif   # if NET
 # Used by archs to tell that they support BPF_JIT
 config HAVE_BPF_JIT
 	bool
+
+config HAVE_EBPF_JIT
+	bool
diff --git a/net/core/filter.c b/net/core/filter.c
index e943554..229bf08 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -984,7 +984,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 		 */
 		goto out_err_free;
 
-	bpf_prog_select_runtime(fp);
+	err = bpf_prog_select_runtime(fp);
+	if (err)
+		goto out_err_free;
 
 	kfree(old_prog);
 	return fp;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f5ef211..6578a0a 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -292,7 +292,13 @@ static struct ctl_table net_core_table[] = {
 		.data		= &bpf_jit_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 		.proc_handler	= proc_dointvec
+#else
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one,
+#endif
 	},
 #endif
 	{
diff --git a/net/socket.c b/net/socket.c
index 2cf4f25..5b31e5b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2534,6 +2534,15 @@ static int __init sock_init(void)
 
 core_initcall(sock_init);	/* early initcall */
 
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+	bpf_jit_enable = 1;
+#endif
+	return 0;
+}
+pure_initcall(jit_init);
+
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH stable 4.4 5/9] bpf: arsh is not supported in 32 bit alu thus reject it
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
                   ` (3 preceding siblings ...)
  2018-01-30  2:37 ` [PATCH stable 4.4 4/9] bpf: introduce BPF_JIT_ALWAYS_ON config Daniel Borkmann
@ 2018-01-30  2:37 ` Daniel Borkmann
  2018-02-01  8:07   ` Patch "bpf: arsh is not supported in 32 bit alu thus reject it" has been added to the 4.4-stable tree gregkh
  2018-01-30  2:37 ` [PATCH stable 4.4 6/9] bpf: avoid false sharing of map refcount with max_entries Daniel Borkmann
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable

[ upstream commit 7891a87efc7116590eaba57acc3c422487802c6f ]

The following snippet was throwing an 'unknown opcode cc' warning
in BPF interpreter:

  0: (18) r0 = 0x0
  2: (7b) *(u64 *)(r10 -16) = r0
  3: (cc) (u32) r0 s>>= (u32) r0
  4: (95) exit

Although a number of JITs do support BPF_ALU | BPF_ARSH | BPF_{K,X}
generation, not all of them do and interpreter does neither. We can
leave existing ones and implement it later in bpf-next for the
remaining ones, but reject this properly in verifier for the time
being.

Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Reported-by: syzbot+93c4904c5c70348a6890@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a626797..32af151 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1165,6 +1165,11 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
 			return -EINVAL;
 		}
 
+		if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
+			verbose("BPF_ARSH not supported for 32 bit ALU\n");
+			return -EINVAL;
+		}
+
 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH stable 4.4 6/9] bpf: avoid false sharing of map refcount with max_entries
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
                   ` (4 preceding siblings ...)
  2018-01-30  2:37 ` [PATCH stable 4.4 5/9] bpf: arsh is not supported in 32 bit alu thus reject it Daniel Borkmann
@ 2018-01-30  2:37 ` Daniel Borkmann
  2018-02-01  8:07   ` Patch "bpf: avoid false sharing of map refcount with max_entries" has been added to the 4.4-stable tree gregkh
  2018-01-30  2:37 ` [PATCH stable 4.4 7/9] bpf: fix divides by zero Daniel Borkmann
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable

[ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ]

In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds
speculation") also change the layout of struct bpf_map such that
false sharing of fast-path members like max_entries is avoided
when the maps reference counter is altered. Therefore enforce
them to be placed into separate cachelines.

pahole dump after change:

  struct bpf_map {
        const struct bpf_map_ops  * ops;                 /*     0     8 */
        struct bpf_map *           inner_map_meta;       /*     8     8 */
        void *                     security;             /*    16     8 */
        enum bpf_map_type          map_type;             /*    24     4 */
        u32                        key_size;             /*    28     4 */
        u32                        value_size;           /*    32     4 */
        u32                        max_entries;          /*    36     4 */
        u32                        map_flags;            /*    40     4 */
        u32                        pages;                /*    44     4 */
        u32                        id;                   /*    48     4 */
        int                        numa_node;            /*    52     4 */
        bool                       unpriv_array;         /*    56     1 */

        /* XXX 7 bytes hole, try to pack */

        /* --- cacheline 1 boundary (64 bytes) --- */
        struct user_struct *       user;                 /*    64     8 */
        atomic_t                   refcnt;               /*    72     4 */
        atomic_t                   usercnt;              /*    76     4 */
        struct work_struct         work;                 /*    80    32 */
        char                       name[16];             /*   112    16 */
        /* --- cacheline 2 boundary (128 bytes) --- */

        /* size: 128, cachelines: 2, members: 17 */
        /* sum members: 121, holes: 1, sum holes: 7 */
  };

Now all entries in the first cacheline are read only throughout
the life time of the map, set up once during map creation. Overall
struct size and number of cachelines doesn't change from the
reordering. struct bpf_map is usually first member and embedded
in map structs in specific map implementations, so also avoid those
members to sit at the end where it could potentially share the
cacheline with first map values e.g. in the array since remote
CPUs could trigger map updates just as well for those (easily
dirtying members like max_entries intentionally as well) while
having subsequent values in cache.

Quoting from Google's Project Zero blog [1]:

  Additionally, at least on the Intel machine on which this was
  tested, bouncing modified cache lines between cores is slow,
  apparently because the MESI protocol is used for cache coherence
  [8]. Changing the reference counter of an eBPF array on one
  physical CPU core causes the cache line containing the reference
  counter to be bounced over to that CPU core, making reads of the
  reference counter on all other CPU cores slow until the changed
  reference counter has been written back to memory. Because the
  length and the reference counter of an eBPF array are stored in
  the same cache line, this also means that changing the reference
  counter on one physical CPU core causes reads of the eBPF array's
  length to be slow on other physical CPU cores (intentional false
  sharing).

While this doesn't 'control' the out-of-bounds speculation through
masking the index as in commit b2157399cc98, triggering a manipulation
of the map's reference counter is really trivial, so lets not allow
to easily affect max_entries from it.

Splitting to separate cachelines also generally makes sense from
a performance perspective anyway in that fast-path won't have a
cache miss if the map gets pinned, reused in other progs, etc out
of control path, thus also avoids unintentional false sharing.

  [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f215715..132585a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,17 +31,25 @@ struct bpf_map_ops {
 };
 
 struct bpf_map {
-	atomic_t refcnt;
+	/* 1st cacheline with read-mostly members of which some
+	 * are also accessed in fast-path (e.g. ops, max_entries).
+	 */
+	const struct bpf_map_ops *ops ____cacheline_aligned;
 	enum bpf_map_type map_type;
 	u32 key_size;
 	u32 value_size;
 	u32 max_entries;
 	u32 pages;
 	bool unpriv_array;
-	struct user_struct *user;
-	const struct bpf_map_ops *ops;
-	struct work_struct work;
+	/* 7 bytes hole */
+
+	/* 2nd cacheline with misc members to avoid false sharing
+	 * particularly with refcounting.
+	 */
+	struct user_struct *user ____cacheline_aligned;
+	atomic_t refcnt;
 	atomic_t usercnt;
+	struct work_struct work;
 };
 
 struct bpf_map_type_list {
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH stable 4.4 7/9] bpf: fix divides by zero
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
                   ` (5 preceding siblings ...)
  2018-01-30  2:37 ` [PATCH stable 4.4 6/9] bpf: avoid false sharing of map refcount with max_entries Daniel Borkmann
@ 2018-01-30  2:37 ` Daniel Borkmann
  2018-02-01  8:07   ` Patch "bpf: fix divides by zero" has been added to the 4.4-stable tree gregkh
  2018-01-30  2:37 ` [PATCH stable 4.4 8/9] bpf: fix 32-bit divide by zero Daniel Borkmann
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable, Eric Dumazet

From: Eric Dumazet <edumazet@google.com>

[ upstream commit c366287ebd698ef5e3de300d90cd62ee9ee7373e ]

Divides by zero are not nice, lets avoid them if possible.

Also do_div() seems not needed when dealing with 32bit operands,
but this seems a minor detail.

Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c40e25e..eb52d11 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -444,7 +444,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		DST = tmp;
 		CONT;
 	ALU_MOD_X:
-		if (unlikely(SRC == 0))
+		if (unlikely((u32)SRC == 0))
 			return 0;
 		tmp = (u32) DST;
 		DST = do_div(tmp, (u32) SRC);
@@ -463,7 +463,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		DST = div64_u64(DST, SRC);
 		CONT;
 	ALU_DIV_X:
-		if (unlikely(SRC == 0))
+		if (unlikely((u32)SRC == 0))
 			return 0;
 		tmp = (u32) DST;
 		do_div(tmp, (u32) SRC);
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH stable 4.4 8/9] bpf: fix 32-bit divide by zero
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
                   ` (6 preceding siblings ...)
  2018-01-30  2:37 ` [PATCH stable 4.4 7/9] bpf: fix divides by zero Daniel Borkmann
@ 2018-01-30  2:37 ` Daniel Borkmann
  2018-02-01  8:07   ` Patch "bpf: fix 32-bit divide by zero" has been added to the 4.4-stable tree gregkh
  2018-01-30  2:37 ` [PATCH stable 4.4 9/9] bpf: reject stores into ctx via st and xadd Daniel Borkmann
  2018-01-30  6:30 ` [PATCH stable 4.4 0/9] BPF stable patches Greg KH
  9 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable

From: Alexei Starovoitov <ast@kernel.org>

[ upstream commit 68fda450a7df51cff9e5a4d4a4d9d0d5f2589153 ]

due to some JITs doing if (src_reg == 0) check in 64-bit mode
for div/mod operations mask upper 32-bits of src register
before doing the check

Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT")
Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.")
Reported-by: syzbot+48340bb518e88849e2e3@syzkaller.appspotmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 18 ++++++++++++++++++
 net/core/filter.c     |  4 ++++
 2 files changed, 22 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 32af151..c9156bd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2248,6 +2248,24 @@ static int fixup_bpf_calls(struct verifier_env *env)
 	int i, cnt, delta = 0;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+			/* due to JIT bugs clear upper 32-bits of src register
+			 * before div/mod operation
+			 */
+			insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
+			insn_buf[1] = *insn;
+			cnt = 2;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			continue;
+		}
+
 		if (insn->code != (BPF_JMP | BPF_CALL))
 			continue;
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 229bf08..1a9ded6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -430,6 +430,10 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
 			    convert_bpf_extensions(fp, &insn))
 				break;
 
+			if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
+			    fp->code == (BPF_ALU | BPF_MOD | BPF_X))
+				*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+
 			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
 			break;
 
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH stable 4.4 9/9] bpf: reject stores into ctx via st and xadd
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
                   ` (7 preceding siblings ...)
  2018-01-30  2:37 ` [PATCH stable 4.4 8/9] bpf: fix 32-bit divide by zero Daniel Borkmann
@ 2018-01-30  2:37 ` Daniel Borkmann
  2018-02-01  8:07   ` Patch "bpf: reject stores into ctx via st and xadd" has been added to the 4.4-stable tree gregkh
  2018-01-30  6:30 ` [PATCH stable 4.4 0/9] BPF stable patches Greg KH
  9 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30  2:37 UTC (permalink / raw)
  To: gregkh; +Cc: ast, daniel, stable

[ upstream commit f37a8cb84cce18762e8f86a70bd6a49a66ab964c ]

Alexei found that verifier does not reject stores into context
via BPF_ST instead of BPF_STX. And while looking at it, we
also should not allow XADD variant of BPF_STX.

The context rewriter is only assuming either BPF_LDX_MEM- or
BPF_STX_MEM-type operations, thus reject anything other than
that so that assumptions in the rewriter properly hold. Add
test cases as well for BPF selftests.

Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c9156bd..c140038 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -683,6 +683,13 @@ static bool is_pointer_value(struct verifier_env *env, int regno)
 	}
 }
 
+static bool is_ctx_reg(struct verifier_env *env, int regno)
+{
+	const struct reg_state *reg = &env->cur_state.regs[regno];
+
+	return reg->type == PTR_TO_CTX;
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -779,6 +786,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
 		return -EACCES;
 	}
 
+	if (is_ctx_reg(env, insn->dst_reg)) {
+		verbose("BPF_XADD stores into R%d context is not allowed\n",
+			insn->dst_reg);
+		return -EACCES;
+	}
+
 	/* check whether atomic_add can read the memory */
 	err = check_mem_access(env, insn->dst_reg, insn->off,
 			       BPF_SIZE(insn->code), BPF_READ, -1);
@@ -1909,6 +1922,12 @@ static int do_check(struct verifier_env *env)
 			if (err)
 				return err;
 
+			if (is_ctx_reg(env, insn->dst_reg)) {
+				verbose("BPF_ST stores into R%d context is not allowed\n",
+					insn->dst_reg);
+				return -EACCES;
+			}
+
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
-- 
2.9.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH stable 4.4 0/9] BPF stable patches
  2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
                   ` (8 preceding siblings ...)
  2018-01-30  2:37 ` [PATCH stable 4.4 9/9] bpf: reject stores into ctx via st and xadd Daniel Borkmann
@ 2018-01-30  6:30 ` Greg KH
  2018-01-30 10:29   ` Daniel Borkmann
  9 siblings, 1 reply; 22+ messages in thread
From: Greg KH @ 2018-01-30  6:30 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: ast, stable

On Tue, Jan 30, 2018 at 03:37:37AM +0100, Daniel Borkmann wrote:
> All for 4.4 backported and (limited) testing.

Many thanks for these, I'll queue them up later this week after this
latest kernel gets released tomorrow.

greg k-h

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH stable 4.4 0/9] BPF stable patches
  2018-01-30  6:30 ` [PATCH stable 4.4 0/9] BPF stable patches Greg KH
@ 2018-01-30 10:29   ` Daniel Borkmann
  2018-02-01  8:07     ` Greg KH
  0 siblings, 1 reply; 22+ messages in thread
From: Daniel Borkmann @ 2018-01-30 10:29 UTC (permalink / raw)
  To: Greg KH; +Cc: ast, stable

On 01/30/2018 07:30 AM, Greg KH wrote:
> On Tue, Jan 30, 2018 at 03:37:37AM +0100, Daniel Borkmann wrote:
>> All for 4.4 backported and (limited) testing.
> 
> Many thanks for these, I'll queue them up later this week after this
> latest kernel gets released tomorrow.

Great, thanks a lot Greg!

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Patch "bpf: arsh is not supported in 32 bit alu thus reject it" has been added to the 4.4-stable tree
  2018-01-30  2:37 ` [PATCH stable 4.4 5/9] bpf: arsh is not supported in 32 bit alu thus reject it Daniel Borkmann
@ 2018-02-01  8:07   ` gregkh
  0 siblings, 0 replies; 22+ messages in thread
From: gregkh @ 2018-02-01  8:07 UTC (permalink / raw)
  To: daniel, ast, gregkh; +Cc: stable, stable-commits


This is a note to let you know that I've just added the patch titled

    bpf: arsh is not supported in 32 bit alu thus reject it

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


>From foo@baz Thu Feb  1 09:05:44 CET 2018
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 30 Jan 2018 03:37:42 +0100
Subject: bpf: arsh is not supported in 32 bit alu thus reject it
To: gregkh@linuxfoundation.org
Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
Message-ID: <60932351924d42bf28628b0a01a693602cc0d9b9.1517279268.git.daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>

[ upstream commit 7891a87efc7116590eaba57acc3c422487802c6f ]

The following snippet was throwing an 'unknown opcode cc' warning
in BPF interpreter:

  0: (18) r0 = 0x0
  2: (7b) *(u64 *)(r10 -16) = r0
  3: (cc) (u32) r0 s>>= (u32) r0
  4: (95) exit

Although a number of JITs do support BPF_ALU | BPF_ARSH | BPF_{K,X}
generation, not all of them do and interpreter does neither. We can
leave existing ones and implement it later in bpf-next for the
remaining ones, but reject this properly in verifier for the time
being.

Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Reported-by: syzbot+93c4904c5c70348a6890@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/verifier.c |    5 +++++
 1 file changed, 5 insertions(+)

--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1165,6 +1165,11 @@ static int check_alu_op(struct verifier_
 			return -EINVAL;
 		}
 
+		if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
+			verbose("BPF_ARSH not supported for 32 bit ALU\n");
+			return -EINVAL;
+		}
+
 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;


Patches currently in stable-queue which might be from daniel@iogearbox.net are

queue-4.4/bpf-fix-branch-pruning-logic.patch
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
queue-4.4/bpf-fix-divides-by-zero.patch
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Patch "bpf: avoid false sharing of map refcount with max_entries" has been added to the 4.4-stable tree
  2018-01-30  2:37 ` [PATCH stable 4.4 6/9] bpf: avoid false sharing of map refcount with max_entries Daniel Borkmann
@ 2018-02-01  8:07   ` gregkh
  0 siblings, 0 replies; 22+ messages in thread
From: gregkh @ 2018-02-01  8:07 UTC (permalink / raw)
  To: daniel, ast, gregkh; +Cc: stable, stable-commits


This is a note to let you know that I've just added the patch titled

    bpf: avoid false sharing of map refcount with max_entries

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


>From foo@baz Thu Feb  1 09:05:44 CET 2018
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 30 Jan 2018 03:37:43 +0100
Subject: bpf: avoid false sharing of map refcount with max_entries
To: gregkh@linuxfoundation.org
Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
Message-ID: <6c5f91e38c952be4831f6764a92cedb7a48be095.1517279268.git.daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>

[ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ]

In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds
speculation") also change the layout of struct bpf_map such that
false sharing of fast-path members like max_entries is avoided
when the maps reference counter is altered. Therefore enforce
them to be placed into separate cachelines.

pahole dump after change:

  struct bpf_map {
        const struct bpf_map_ops  * ops;                 /*     0     8 */
        struct bpf_map *           inner_map_meta;       /*     8     8 */
        void *                     security;             /*    16     8 */
        enum bpf_map_type          map_type;             /*    24     4 */
        u32                        key_size;             /*    28     4 */
        u32                        value_size;           /*    32     4 */
        u32                        max_entries;          /*    36     4 */
        u32                        map_flags;            /*    40     4 */
        u32                        pages;                /*    44     4 */
        u32                        id;                   /*    48     4 */
        int                        numa_node;            /*    52     4 */
        bool                       unpriv_array;         /*    56     1 */

        /* XXX 7 bytes hole, try to pack */

        /* --- cacheline 1 boundary (64 bytes) --- */
        struct user_struct *       user;                 /*    64     8 */
        atomic_t                   refcnt;               /*    72     4 */
        atomic_t                   usercnt;              /*    76     4 */
        struct work_struct         work;                 /*    80    32 */
        char                       name[16];             /*   112    16 */
        /* --- cacheline 2 boundary (128 bytes) --- */

        /* size: 128, cachelines: 2, members: 17 */
        /* sum members: 121, holes: 1, sum holes: 7 */
  };

Now all entries in the first cacheline are read only throughout
the life time of the map, set up once during map creation. Overall
struct size and number of cachelines doesn't change from the
reordering. struct bpf_map is usually first member and embedded
in map structs in specific map implementations, so also avoid those
members to sit at the end where it could potentially share the
cacheline with first map values e.g. in the array since remote
CPUs could trigger map updates just as well for those (easily
dirtying members like max_entries intentionally as well) while
having subsequent values in cache.

Quoting from Google's Project Zero blog [1]:

  Additionally, at least on the Intel machine on which this was
  tested, bouncing modified cache lines between cores is slow,
  apparently because the MESI protocol is used for cache coherence
  [8]. Changing the reference counter of an eBPF array on one
  physical CPU core causes the cache line containing the reference
  counter to be bounced over to that CPU core, making reads of the
  reference counter on all other CPU cores slow until the changed
  reference counter has been written back to memory. Because the
  length and the reference counter of an eBPF array are stored in
  the same cache line, this also means that changing the reference
  counter on one physical CPU core causes reads of the eBPF array's
  length to be slow on other physical CPU cores (intentional false
  sharing).

While this doesn't 'control' the out-of-bounds speculation through
masking the index as in commit b2157399cc98, triggering a manipulation
of the map's reference counter is really trivial, so lets not allow
to easily affect max_entries from it.

Splitting to separate cachelines also generally makes sense from
a performance perspective anyway in that fast-path won't have a
cache miss if the map gets pinned, reused in other progs, etc out
of control path, thus also avoids unintentional false sharing.

  [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/bpf.h |   16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,17 +31,25 @@ struct bpf_map_ops {
 };
 
 struct bpf_map {
-	atomic_t refcnt;
+	/* 1st cacheline with read-mostly members of which some
+	 * are also accessed in fast-path (e.g. ops, max_entries).
+	 */
+	const struct bpf_map_ops *ops ____cacheline_aligned;
 	enum bpf_map_type map_type;
 	u32 key_size;
 	u32 value_size;
 	u32 max_entries;
 	u32 pages;
 	bool unpriv_array;
-	struct user_struct *user;
-	const struct bpf_map_ops *ops;
-	struct work_struct work;
+	/* 7 bytes hole */
+
+	/* 2nd cacheline with misc members to avoid false sharing
+	 * particularly with refcounting.
+	 */
+	struct user_struct *user ____cacheline_aligned;
+	atomic_t refcnt;
 	atomic_t usercnt;
+	struct work_struct work;
 };
 
 struct bpf_map_type_list {


Patches currently in stable-queue which might be from daniel@iogearbox.net are

queue-4.4/bpf-fix-branch-pruning-logic.patch
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
queue-4.4/bpf-fix-divides-by-zero.patch
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Patch "bpf: fix 32-bit divide by zero" has been added to the 4.4-stable tree
  2018-01-30  2:37 ` [PATCH stable 4.4 8/9] bpf: fix 32-bit divide by zero Daniel Borkmann
@ 2018-02-01  8:07   ` gregkh
  0 siblings, 0 replies; 22+ messages in thread
From: gregkh @ 2018-02-01  8:07 UTC (permalink / raw)
  To: daniel, ast, gregkh; +Cc: stable, stable-commits


This is a note to let you know that I've just added the patch titled

    bpf: fix 32-bit divide by zero

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     bpf-fix-32-bit-divide-by-zero.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


>From foo@baz Thu Feb  1 09:05:44 CET 2018
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 30 Jan 2018 03:37:45 +0100
Subject: bpf: fix 32-bit divide by zero
To: gregkh@linuxfoundation.org
Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
Message-ID: <7e8a78250e8cf1f486b4cdb005e3ff313b992816.1517279268.git.daniel@iogearbox.net>

From: Alexei Starovoitov <ast@kernel.org>

[ upstream commit 68fda450a7df51cff9e5a4d4a4d9d0d5f2589153 ]

due to some JITs doing if (src_reg == 0) check in 64-bit mode
for div/mod operations mask upper 32-bits of src register
before doing the check

Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT")
Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.")
Reported-by: syzbot+48340bb518e88849e2e3@syzkaller.appspotmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/verifier.c |   18 ++++++++++++++++++
 net/core/filter.c     |    4 ++++
 2 files changed, 22 insertions(+)

--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2248,6 +2248,24 @@ static int fixup_bpf_calls(struct verifi
 	int i, cnt, delta = 0;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+			/* due to JIT bugs clear upper 32-bits of src register
+			 * before div/mod operation
+			 */
+			insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
+			insn_buf[1] = *insn;
+			cnt = 2;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			continue;
+		}
+
 		if (insn->code != (BPF_JMP | BPF_CALL))
 			continue;
 
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -430,6 +430,10 @@ do_pass:
 			    convert_bpf_extensions(fp, &insn))
 				break;
 
+			if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
+			    fp->code == (BPF_ALU | BPF_MOD | BPF_X))
+				*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+
 			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
 			break;
 


Patches currently in stable-queue which might be from daniel@iogearbox.net are

queue-4.4/bpf-fix-branch-pruning-logic.patch
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
queue-4.4/bpf-fix-divides-by-zero.patch
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Patch "bpf: fix bpf_tail_call() x64 JIT" has been added to the 4.4-stable tree
  2018-01-30  2:37 ` [PATCH stable 4.4 3/9] bpf: fix bpf_tail_call() x64 JIT Daniel Borkmann
@ 2018-02-01  8:07   ` gregkh
  0 siblings, 0 replies; 22+ messages in thread
From: gregkh @ 2018-02-01  8:07 UTC (permalink / raw)
  To: daniel, ast, ast, davem, edumazet, gregkh, kafai; +Cc: stable, stable-commits


This is a note to let you know that I've just added the patch titled

    bpf: fix bpf_tail_call() x64 JIT

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     bpf-fix-bpf_tail_call-x64-jit.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


>From foo@baz Thu Feb  1 09:05:44 CET 2018
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 30 Jan 2018 03:37:40 +0100
Subject: bpf: fix bpf_tail_call() x64 JIT
To: gregkh@linuxfoundation.org
Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org, Alexei Starovoitov <ast@fb.com>, "David S . Miller" <davem@davemloft.net>
Message-ID: <1d696e8c8bf884fb67aca8fe4ab8ba132b8a2ed1.1517279268.git.daniel@iogearbox.net>

From: Alexei Starovoitov <ast@fb.com>

[ upstream commit 90caccdd8cc0215705f18b92771b449b01e2474a ]

- bpf prog_array just like all other types of bpf array accepts 32-bit index.
  Clarify that in the comment.
- fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes
- tighten corresponding check in the interpreter to stay consistent

The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag
in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and
though JIT code is wrong it will check bounds correctly.
Hence two fixes tags. All other JITs don't have this problem.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation")
Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper")
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/net/bpf_jit_comp.c |    4 ++--
 kernel/bpf/core.c           |    2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -266,9 +266,9 @@ static void emit_bpf_tail_call(u8 **ppro
 	/* if (index >= array->map.max_entries)
 	 *   goto out;
 	 */
-	EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
+	EMIT2(0x89, 0xD2);                        /* mov edx, edx */
+	EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
 	      offsetof(struct bpf_array, map.max_entries));
-	EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
 #define OFFSET1 43 /* number of bytes to jump */
 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 	label1 = cnt;
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -517,7 +517,7 @@ select_insn:
 		struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
 		struct bpf_array *array = container_of(map, struct bpf_array, map);
 		struct bpf_prog *prog;
-		u64 index = BPF_R3;
+		u32 index = BPF_R3;
 
 		if (unlikely(index >= array->map.max_entries))
 			goto out;


Patches currently in stable-queue which might be from daniel@iogearbox.net are

queue-4.4/bpf-fix-branch-pruning-logic.patch
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
queue-4.4/bpf-fix-divides-by-zero.patch
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH stable 4.4 0/9] BPF stable patches
  2018-01-30 10:29   ` Daniel Borkmann
@ 2018-02-01  8:07     ` Greg KH
  0 siblings, 0 replies; 22+ messages in thread
From: Greg KH @ 2018-02-01  8:07 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: ast, stable

On Tue, Jan 30, 2018 at 11:29:39AM +0100, Daniel Borkmann wrote:
> On 01/30/2018 07:30 AM, Greg KH wrote:
> > On Tue, Jan 30, 2018 at 03:37:37AM +0100, Daniel Borkmann wrote:
> >> All for 4.4 backported and (limited) testing.
> > 
> > Many thanks for these, I'll queue them up later this week after this
> > latest kernel gets released tomorrow.
> 
> Great, thanks a lot Greg!

All now queued up, thanks again for the backports.

greg k-h

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Patch "bpf: fix branch pruning logic" has been added to the 4.4-stable tree
  2018-01-30  2:37 ` [PATCH stable 4.4 1/9] bpf: fix branch pruning logic Daniel Borkmann
@ 2018-02-01  8:07   ` gregkh
  0 siblings, 0 replies; 22+ messages in thread
From: gregkh @ 2018-02-01  8:07 UTC (permalink / raw)
  To: daniel, ast, ast, gregkh; +Cc: stable, stable-commits


This is a note to let you know that I've just added the patch titled

    bpf: fix branch pruning logic

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     bpf-fix-branch-pruning-logic.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


>From foo@baz Thu Feb  1 09:05:44 CET 2018
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 30 Jan 2018 03:37:38 +0100
Subject: bpf: fix branch pruning logic
To: gregkh@linuxfoundation.org
Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
Message-ID: <c4bc7f798c0e99b4551d3423c294350fc8223714.1517279268.git.daniel@iogearbox.net>

From: Alexei Starovoitov <ast@fb.com>

[ Upstream commit c131187db2d3fa2f8bf32fdf4e9a4ef805168467 ]

when the verifier detects that register contains a runtime constant
and it's compared with another constant it will prune exploration
of the branch that is guaranteed not to be taken at runtime.
This is all correct, but malicious program may be constructed
in such a way that it always has a constant comparison and
the other branch is never taken under any conditions.
In this case such path through the program will not be explored
by the verifier. It won't be taken at run-time either, but since
all instructions are JITed the malicious program may cause JITs
to complain about using reserved fields, etc.
To fix the issue we have to track the instructions explored by
the verifier and sanitize instructions that are dead at run time
with NOPs. We cannot reject such dead code, since llvm generates
it for valid C code, since it doesn't do as much data flow
analysis as the verifier does.

Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/verifier.c |   28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -191,6 +191,7 @@ struct bpf_insn_aux_data {
 		enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
 		struct bpf_map *map_ptr;	/* pointer for call insn into lookup_elem */
 	};
+	bool seen; /* this insn was processed by the verifier */
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -1793,6 +1794,7 @@ static int do_check(struct verifier_env
 			print_bpf_insn(env, insn);
 		}
 
+		env->insn_aux_data[insn_idx].seen = true;
 		if (class == BPF_ALU || class == BPF_ALU64) {
 			err = check_alu_op(env, insn);
 			if (err)
@@ -1988,6 +1990,7 @@ process_bpf_exit:
 					return err;
 
 				insn_idx++;
+				env->insn_aux_data[insn_idx].seen = true;
 			} else {
 				verbose("invalid BPF_LD mode\n");
 				return -EINVAL;
@@ -2125,6 +2128,7 @@ static int adjust_insn_aux_data(struct v
 				u32 off, u32 cnt)
 {
 	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+	int i;
 
 	if (cnt == 1)
 		return 0;
@@ -2134,6 +2138,8 @@ static int adjust_insn_aux_data(struct v
 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
 	memcpy(new_data + off + cnt - 1, old_data + off,
 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+	for (i = off; i < off + cnt - 1; i++)
+		new_data[i].seen = true;
 	env->insn_aux_data = new_data;
 	vfree(old_data);
 	return 0;
@@ -2152,6 +2158,25 @@ static struct bpf_prog *bpf_patch_insn_d
 	return new_prog;
 }
 
+/* The verifier does more data flow analysis than llvm and will not explore
+ * branches that are dead at run time. Malicious programs can have dead code
+ * too. Therefore replace all dead at-run-time code with nops.
+ */
+static void sanitize_dead_code(struct verifier_env *env)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
+	struct bpf_insn *insn = env->prog->insnsi;
+	const int insn_cnt = env->prog->len;
+	int i;
+
+	for (i = 0; i < insn_cnt; i++) {
+		if (aux_data[i].seen)
+			continue;
+		memcpy(insn + i, &nop, sizeof(nop));
+	}
+}
+
 /* convert load instructions that access fields of 'struct __sk_buff'
  * into sequence of instructions that access fields of 'struct sk_buff'
  */
@@ -2371,6 +2396,9 @@ skip_full_check:
 	free_states(env);
 
 	if (ret == 0)
+		sanitize_dead_code(env);
+
+	if (ret == 0)
 		/* program is valid, convert *(u32*)(ctx + off) accesses */
 		ret = convert_ctx_accesses(env);
 


Patches currently in stable-queue which might be from daniel@iogearbox.net are

queue-4.4/bpf-fix-branch-pruning-logic.patch
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
queue-4.4/bpf-fix-divides-by-zero.patch
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Patch "bpf: fix divides by zero" has been added to the 4.4-stable tree
  2018-01-30  2:37 ` [PATCH stable 4.4 7/9] bpf: fix divides by zero Daniel Borkmann
@ 2018-02-01  8:07   ` gregkh
  0 siblings, 0 replies; 22+ messages in thread
From: gregkh @ 2018-02-01  8:07 UTC (permalink / raw)
  To: daniel, ast, edumazet, gregkh, syzkaller; +Cc: stable, stable-commits


This is a note to let you know that I've just added the patch titled

    bpf: fix divides by zero

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     bpf-fix-divides-by-zero.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


>From foo@baz Thu Feb  1 09:05:44 CET 2018
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 30 Jan 2018 03:37:44 +0100
Subject: bpf: fix divides by zero
To: gregkh@linuxfoundation.org
Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org, Eric Dumazet <edumazet@google.com>
Message-ID: <cdf90cab74cbd2ef542fc5f982b55f423a52a5b5.1517279268.git.daniel@iogearbox.net>

From: Eric Dumazet <edumazet@google.com>

[ upstream commit c366287ebd698ef5e3de300d90cd62ee9ee7373e ]

Divides by zero are not nice, lets avoid them if possible.

Also do_div() seems not needed when dealing with 32bit operands,
but this seems a minor detail.

Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/core.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -444,7 +444,7 @@ select_insn:
 		DST = tmp;
 		CONT;
 	ALU_MOD_X:
-		if (unlikely(SRC == 0))
+		if (unlikely((u32)SRC == 0))
 			return 0;
 		tmp = (u32) DST;
 		DST = do_div(tmp, (u32) SRC);
@@ -463,7 +463,7 @@ select_insn:
 		DST = div64_u64(DST, SRC);
 		CONT;
 	ALU_DIV_X:
-		if (unlikely(SRC == 0))
+		if (unlikely((u32)SRC == 0))
 			return 0;
 		tmp = (u32) DST;
 		do_div(tmp, (u32) SRC);


Patches currently in stable-queue which might be from daniel@iogearbox.net are

queue-4.4/bpf-fix-branch-pruning-logic.patch
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
queue-4.4/bpf-fix-divides-by-zero.patch
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Patch "bpf: introduce BPF_JIT_ALWAYS_ON config" has been added to the 4.4-stable tree
  2018-01-30  2:37 ` [PATCH stable 4.4 4/9] bpf: introduce BPF_JIT_ALWAYS_ON config Daniel Borkmann
@ 2018-02-01  8:07   ` gregkh
  0 siblings, 0 replies; 22+ messages in thread
From: gregkh @ 2018-02-01  8:07 UTC (permalink / raw)
  To: daniel, ast, gregkh; +Cc: stable, stable-commits


This is a note to let you know that I've just added the patch titled

    bpf: introduce BPF_JIT_ALWAYS_ON config

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     bpf-introduce-bpf_jit_always_on-config.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


>From foo@baz Thu Feb  1 09:05:44 CET 2018
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 30 Jan 2018 03:37:41 +0100
Subject: bpf: introduce BPF_JIT_ALWAYS_ON config
To: gregkh@linuxfoundation.org
Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
Message-ID: <8fa0284c9e3811cc7ae467dd3490da45ff76b46b.1517279268.git.daniel@iogearbox.net>

From: Alexei Starovoitov <ast@kernel.org>

[ upstream commit 290af86629b25ffd1ed6232c4e9107da031705cb ]

The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715.

A quote from goolge project zero blog:
"At this point, it would normally be necessary to locate gadgets in
the host kernel code that can be used to actually leak data by reading
from an attacker-controlled location, shifting and masking the result
appropriately and then using the result of that as offset to an
attacker-controlled address for a load. But piecing gadgets together
and figuring out which ones work in a speculation context seems annoying.
So instead, we decided to use the eBPF interpreter, which is built into
the host kernel - while there is no legitimate way to invoke it from inside
a VM, the presence of the code in the host kernel's text section is sufficient
to make it usable for the attack, just like with ordinary ROP gadgets."

To make attacker job harder introduce BPF_JIT_ALWAYS_ON config
option that removes interpreter from the kernel in favor of JIT-only mode.
So far eBPF JIT is supported by:
x64, arm64, arm32, sparc64, s390, powerpc64, mips64

The start of JITed program is randomized and code page is marked as read-only.
In addition "constant blinding" can be turned on with net.core.bpf_jit_harden

v2->v3:
- move __bpf_prog_ret0 under ifdef (Daniel)

v1->v2:
- fix init order, test_bpf and cBPF (Daniel's feedback)
- fix offloaded bpf (Jakub's feedback)
- add 'return 0' dummy in case something can invoke prog->bpf_func
- retarget bpf tree. For bpf-next the patch would need one extra hunk.
  It will be sent when the trees are merged back to net-next

Considered doing:
  int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT;
but it seems better to land the patch as-is and in bpf-next remove
bpf_jit_enable global variable from all JITs, consolidate in one place
and remove this jit_init() function.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/Kconfig         |    1 +
 arch/s390/Kconfig          |    1 +
 arch/x86/Kconfig           |    1 +
 init/Kconfig               |    7 +++++++
 kernel/bpf/core.c          |   24 +++++++++++++++++++++++-
 lib/test_bpf.c             |   13 ++++++++-----
 net/Kconfig                |    3 +++
 net/core/filter.c          |    4 +++-
 net/core/sysctl_net_core.c |    6 ++++++
 net/socket.c               |    9 +++++++++
 10 files changed, 62 insertions(+), 7 deletions(-)

--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -54,6 +54,7 @@ config ARM64
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_BPF_JIT
+	select HAVE_EBPF_JIT
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -123,6 +123,7 @@ config S390
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
+	select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_DEBUG_KMEMLEAK
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -88,6 +88,7 @@ config X86
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_BPF_JIT			if X86_64
+	select HAVE_EBPF_JIT			if X86_64
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1556,6 +1556,13 @@ config BPF_SYSCALL
 	  Enable the bpf() system call that allows to manipulate eBPF
 	  programs and maps via file descriptors.
 
+config BPF_JIT_ALWAYS_ON
+	bool "Permanently enable BPF JIT and remove BPF interpreter"
+	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+	help
+	  Enables BPF JIT and removes BPF interpreter to avoid
+	  speculative execution of BPF instructions by the interpreter
+
 config SHMEM
 	bool "Use full shmem filesystem" if EXPERT
 	default y
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -256,6 +256,7 @@ noinline u64 __bpf_call_base(u64 r1, u64
 }
 EXPORT_SYMBOL_GPL(__bpf_call_base);
 
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 /**
  *	__bpf_prog_run - run eBPF program on a given context
  *	@ctx: is the data we are operating on
@@ -725,6 +726,13 @@ load_byte:
 		return 0;
 }
 
+#else
+static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn)
+{
+	return 0;
+}
+#endif
+
 bool bpf_prog_array_compatible(struct bpf_array *array,
 			       const struct bpf_prog *fp)
 {
@@ -771,9 +779,23 @@ static int bpf_check_tail_call(const str
  */
 int bpf_prog_select_runtime(struct bpf_prog *fp)
 {
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 	fp->bpf_func = (void *) __bpf_prog_run;
-
+#else
+	fp->bpf_func = (void *) __bpf_prog_ret0;
+#endif
+
+	/* eBPF JITs can rewrite the program in case constant
+	 * blinding is active. However, in case of error during
+	 * blinding, bpf_int_jit_compile() must always return a
+	 * valid program, which in this case would simply not
+	 * be JITed, but falls back to the interpreter.
+	 */
 	bpf_int_jit_compile(fp);
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+	if (!fp->jited)
+		return -ENOTSUPP;
+#endif
 	bpf_prog_lock_ro(fp);
 
 	/* The tail call compatibility check can only be done at
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5304,9 +5304,8 @@ static struct bpf_prog *generate_filter(
 				return NULL;
 			}
 		}
-		/* We don't expect to fail. */
 		if (*err) {
-			pr_cont("FAIL to attach err=%d len=%d\n",
+			pr_cont("FAIL to prog_create err=%d len=%d\n",
 				*err, fprog.len);
 			return NULL;
 		}
@@ -5325,7 +5324,11 @@ static struct bpf_prog *generate_filter(
 		fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
 		memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn));
 
-		bpf_prog_select_runtime(fp);
+		*err = bpf_prog_select_runtime(fp);
+		if (*err) {
+			pr_cont("FAIL to select_runtime err=%d\n", *err);
+			return NULL;
+		}
 		break;
 	}
 
@@ -5511,8 +5514,8 @@ static __init int test_bpf(void)
 				pass_cnt++;
 				continue;
 			}
-
-			return err;
+			err_cnt++;
+			continue;
 		}
 
 		pr_cont("jited:%u ", fp->jited);
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -388,3 +388,6 @@ endif   # if NET
 # Used by archs to tell that they support BPF_JIT
 config HAVE_BPF_JIT
 	bool
+
+config HAVE_EBPF_JIT
+	bool
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -984,7 +984,9 @@ static struct bpf_prog *bpf_migrate_filt
 		 */
 		goto out_err_free;
 
-	bpf_prog_select_runtime(fp);
+	err = bpf_prog_select_runtime(fp);
+	if (err)
+		goto out_err_free;
 
 	kfree(old_prog);
 	return fp;
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -292,7 +292,13 @@ static struct ctl_table net_core_table[]
 		.data		= &bpf_jit_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 		.proc_handler	= proc_dointvec
+#else
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+		.extra2		= &one,
+#endif
 	},
 #endif
 	{
--- a/net/socket.c
+++ b/net/socket.c
@@ -2534,6 +2534,15 @@ out_fs:
 
 core_initcall(sock_init);	/* early initcall */
 
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+	bpf_jit_enable = 1;
+#endif
+	return 0;
+}
+pure_initcall(jit_init);
+
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {


Patches currently in stable-queue which might be from daniel@iogearbox.net are

queue-4.4/bpf-fix-branch-pruning-logic.patch
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
queue-4.4/bpf-fix-divides-by-zero.patch
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Patch "bpf: reject stores into ctx via st and xadd" has been added to the 4.4-stable tree
  2018-01-30  2:37 ` [PATCH stable 4.4 9/9] bpf: reject stores into ctx via st and xadd Daniel Borkmann
@ 2018-02-01  8:07   ` gregkh
  0 siblings, 0 replies; 22+ messages in thread
From: gregkh @ 2018-02-01  8:07 UTC (permalink / raw)
  To: daniel, ast, gregkh; +Cc: stable, stable-commits


This is a note to let you know that I've just added the patch titled

    bpf: reject stores into ctx via st and xadd

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     bpf-reject-stores-into-ctx-via-st-and-xadd.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


>From foo@baz Thu Feb  1 09:05:44 CET 2018
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 30 Jan 2018 03:37:46 +0100
Subject: bpf: reject stores into ctx via st and xadd
To: gregkh@linuxfoundation.org
Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
Message-ID: <7d49693fcf1d0f23f0f14e8da18acfe03da9fc18.1517279268.git.daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>

[ upstream commit f37a8cb84cce18762e8f86a70bd6a49a66ab964c ]

Alexei found that verifier does not reject stores into context
via BPF_ST instead of BPF_STX. And while looking at it, we
also should not allow XADD variant of BPF_STX.

The context rewriter is only assuming either BPF_LDX_MEM- or
BPF_STX_MEM-type operations, thus reject anything other than
that so that assumptions in the rewriter properly hold. Add
test cases as well for BPF selftests.

Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/verifier.c |   19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -683,6 +683,13 @@ static bool is_pointer_value(struct veri
 	}
 }
 
+static bool is_ctx_reg(struct verifier_env *env, int regno)
+{
+	const struct reg_state *reg = &env->cur_state.regs[regno];
+
+	return reg->type == PTR_TO_CTX;
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -779,6 +786,12 @@ static int check_xadd(struct verifier_en
 		return -EACCES;
 	}
 
+	if (is_ctx_reg(env, insn->dst_reg)) {
+		verbose("BPF_XADD stores into R%d context is not allowed\n",
+			insn->dst_reg);
+		return -EACCES;
+	}
+
 	/* check whether atomic_add can read the memory */
 	err = check_mem_access(env, insn->dst_reg, insn->off,
 			       BPF_SIZE(insn->code), BPF_READ, -1);
@@ -1909,6 +1922,12 @@ static int do_check(struct verifier_env
 			if (err)
 				return err;
 
+			if (is_ctx_reg(env, insn->dst_reg)) {
+				verbose("BPF_ST stores into R%d context is not allowed\n",
+					insn->dst_reg);
+				return -EACCES;
+			}
+
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,


Patches currently in stable-queue which might be from daniel@iogearbox.net are

queue-4.4/bpf-fix-branch-pruning-logic.patch
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
queue-4.4/bpf-fix-divides-by-zero.patch
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Patch "x86: bpf_jit: small optimization in emit_bpf_tail_call()" has been added to the 4.4-stable tree
  2018-01-30  2:37 ` [PATCH stable 4.4 2/9] x86: bpf_jit: small optimization in emit_bpf_tail_call() Daniel Borkmann
@ 2018-02-01  8:07   ` gregkh
  0 siblings, 0 replies; 22+ messages in thread
From: gregkh @ 2018-02-01  8:07 UTC (permalink / raw)
  To: daniel, ast, davem, edumazet, gregkh; +Cc: stable, stable-commits


This is a note to let you know that I've just added the patch titled

    x86: bpf_jit: small optimization in emit_bpf_tail_call()

to the 4.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
and it can be found in the queue-4.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@vger.kernel.org> know about it.


>From foo@baz Thu Feb  1 09:05:44 CET 2018
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 30 Jan 2018 03:37:39 +0100
Subject: x86: bpf_jit: small optimization in emit_bpf_tail_call()
To: gregkh@linuxfoundation.org
Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org, Eric Dumazet <edumazet@google.com>, "David S . Miller" <davem@davemloft.net>
Message-ID: <e0ee628ad21177055e3bad4cea8995f7d66b11c0.1517279268.git.daniel@iogearbox.net>

From: Eric Dumazet <edumazet@google.com>

[ upstream commit 84ccac6e7854ebbfb56d2fc6d5bef9be49bb304c ]

Saves 4 bytes replacing following instructions :

lea rax, [rsi + rdx * 8 + offsetof(...)]
mov rax, qword ptr [rax]
cmp rax, 0

by :

mov rax, [rsi + rdx * 8 + offsetof(...)]
test rax, rax

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/net/bpf_jit_comp.c |    9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -269,7 +269,7 @@ static void emit_bpf_tail_call(u8 **ppro
 	EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
 	      offsetof(struct bpf_array, map.max_entries));
 	EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
-#define OFFSET1 47 /* number of bytes to jump */
+#define OFFSET1 43 /* number of bytes to jump */
 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 	label1 = cnt;
 
@@ -278,21 +278,20 @@ static void emit_bpf_tail_call(u8 **ppro
 	 */
 	EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
 	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 36
+#define OFFSET2 32
 	EMIT2(X86_JA, OFFSET2);                   /* ja out */
 	label2 = cnt;
 	EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
 	EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
 
 	/* prog = array->ptrs[index]; */
-	EMIT4_off32(0x48, 0x8D, 0x84, 0xD6,       /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
+	EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
 		    offsetof(struct bpf_array, ptrs));
-	EMIT3(0x48, 0x8B, 0x00);                  /* mov rax, qword ptr [rax] */
 
 	/* if (prog == NULL)
 	 *   goto out;
 	 */
-	EMIT4(0x48, 0x83, 0xF8, 0x00);            /* cmp rax, 0 */
+	EMIT3(0x48, 0x85, 0xC0);		  /* test rax,rax */
 #define OFFSET3 10
 	EMIT2(X86_JE, OFFSET3);                   /* je out */
 	label3 = cnt;


Patches currently in stable-queue which might be from daniel@iogearbox.net are

queue-4.4/bpf-fix-branch-pruning-logic.patch
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
queue-4.4/bpf-fix-divides-by-zero.patch
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2018-02-01  8:07 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-30  2:37 [PATCH stable 4.4 0/9] BPF stable patches Daniel Borkmann
2018-01-30  2:37 ` [PATCH stable 4.4 1/9] bpf: fix branch pruning logic Daniel Borkmann
2018-02-01  8:07   ` Patch "bpf: fix branch pruning logic" has been added to the 4.4-stable tree gregkh
2018-01-30  2:37 ` [PATCH stable 4.4 2/9] x86: bpf_jit: small optimization in emit_bpf_tail_call() Daniel Borkmann
2018-02-01  8:07   ` Patch "x86: bpf_jit: small optimization in emit_bpf_tail_call()" has been added to the 4.4-stable tree gregkh
2018-01-30  2:37 ` [PATCH stable 4.4 3/9] bpf: fix bpf_tail_call() x64 JIT Daniel Borkmann
2018-02-01  8:07   ` Patch "bpf: fix bpf_tail_call() x64 JIT" has been added to the 4.4-stable tree gregkh
2018-01-30  2:37 ` [PATCH stable 4.4 4/9] bpf: introduce BPF_JIT_ALWAYS_ON config Daniel Borkmann
2018-02-01  8:07   ` Patch "bpf: introduce BPF_JIT_ALWAYS_ON config" has been added to the 4.4-stable tree gregkh
2018-01-30  2:37 ` [PATCH stable 4.4 5/9] bpf: arsh is not supported in 32 bit alu thus reject it Daniel Borkmann
2018-02-01  8:07   ` Patch "bpf: arsh is not supported in 32 bit alu thus reject it" has been added to the 4.4-stable tree gregkh
2018-01-30  2:37 ` [PATCH stable 4.4 6/9] bpf: avoid false sharing of map refcount with max_entries Daniel Borkmann
2018-02-01  8:07   ` Patch "bpf: avoid false sharing of map refcount with max_entries" has been added to the 4.4-stable tree gregkh
2018-01-30  2:37 ` [PATCH stable 4.4 7/9] bpf: fix divides by zero Daniel Borkmann
2018-02-01  8:07   ` Patch "bpf: fix divides by zero" has been added to the 4.4-stable tree gregkh
2018-01-30  2:37 ` [PATCH stable 4.4 8/9] bpf: fix 32-bit divide by zero Daniel Borkmann
2018-02-01  8:07   ` Patch "bpf: fix 32-bit divide by zero" has been added to the 4.4-stable tree gregkh
2018-01-30  2:37 ` [PATCH stable 4.4 9/9] bpf: reject stores into ctx via st and xadd Daniel Borkmann
2018-02-01  8:07   ` Patch "bpf: reject stores into ctx via st and xadd" has been added to the 4.4-stable tree gregkh
2018-01-30  6:30 ` [PATCH stable 4.4 0/9] BPF stable patches Greg KH
2018-01-30 10:29   ` Daniel Borkmann
2018-02-01  8:07     ` Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.