* [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
@ 2012-03-28 19:15 Jan Seiffert
2012-03-28 20:05 ` Eric Dumazet
2012-03-28 20:58 ` David Miller
0 siblings, 2 replies; 14+ messages in thread
From: Jan Seiffert @ 2012-03-28 19:15 UTC (permalink / raw)
To: netdev; +Cc: linux-kernel, David S. Miller, Matt Evans, Eric Dumazet
[-- Attachment #1: Type: text/plain, Size: 2386 bytes --]
Consider the following test program:
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pcap-bpf.h>
#define die(x) do {perror(x); return 1;} while (0)
struct bpf_insn udp_filter[] = {
/* 0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax net[0] */
/* 1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0), /* ldb [x+0] */
/* 2 */ BPF_STMT(BPF_RET|BPF_A, 0), /* ret a */
};
int main(int argc, char *argv[])
{
char buf[512];
struct sockaddr_in addr;
struct bpf_program prg;
socklen_t addr_s;
ssize_t res;
int fd;
addr.sin_family = AF_INET;
addr.sin_port = htons(5000);
addr.sin_addr.s_addr = 0;
addr_s = sizeof(addr);
prg.bf_len = sizeof(udp_filter)/sizeof(udp_filter[0]);
prg.bf_insns = udp_filter;
if(-1 == (fd = socket(AF_INET, SOCK_DGRAM, 0)))
die("socket");
if(-1 == bind(fd, (struct sockaddr *)&addr, sizeof(addr)))
die("bind");
if(-1 == setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prg, sizeof(prg)))
die("setsockopt");
res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr *)&addr, &addr_s);
if(res != -1)
printf("packet received: %zi bytes\n", res);
else
die("recvfrom");
return 0;
}
when used with the bpf jit disabled works:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1: packet received: 6 bytes
When the bpf jit gets enabled (echo 100 >
/proc/sys/net/core/bpf_jit_enable) the same program stops working:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1:
The reason is that both jits (x86 and powerpc) do not handle negative
memory references like SKF_NET_OFF or SKF_LL_OFF, only the simple
ancillary data references are supported (by mapping to special
instructions).
In the case of an absolute reference, the jit aborts the translation
if a negative reference is seen, also a negative k on the indirect
load aborts the translation, but if X is negative to begin with, only
the error handler is reached at runtime which drops the whole packet.
I propose the following patch to fix this situation.
Lightly tested on x86, but the powerpc asm part is prop. wrong.
Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>
--
I know this is not a 100% submission, please refrain from shooting me,
i'm not really set up for professional kernel development, i just want
this fixed.
Greetings
Jan
[-- Attachment #2: bpf_neg.diff --]
[-- Type: application/octet-stream, Size: 10559 bytes --]
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index ff4506e..87dc6e4 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -31,14 +31,11 @@
* then branch directly to slow_path_XXX if required. (In fact, could
* load a spare GPR with the address of slow_path_generic and pass size
* as an argument, making the call site a mtlr, li and bllr.)
- *
- * Technically, the "is addr < 0" check is unnecessary & slowing down
- * the ABS path, as it's statically checked on generation.
*/
.globl sk_load_word
sk_load_word:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_neg_word
/* Are we accessing past headlen? */
subi r_scratch1, r_HL, 4
cmpd r_scratch1, r_addr
@@ -51,7 +48,7 @@ sk_load_word:
.globl sk_load_half
sk_load_half:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_neg_half
subi r_scratch1, r_HL, 2
cmpd r_scratch1, r_addr
blt bpf_slow_path_half
@@ -61,7 +58,7 @@ sk_load_half:
.globl sk_load_byte
sk_load_byte:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_neg_byte
cmpd r_HL, r_addr
ble bpf_slow_path_byte
lbzx r_A, r_D, r_addr
@@ -69,16 +66,22 @@ sk_load_byte:
/*
* BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf)
- * r_addr is the offset value, already known positive
+ * r_addr is the offset value
*/
.globl sk_load_byte_msh
sk_load_byte_msh:
+ cmpdi r_addr, 0
+ blt bpf_slow_path_neg_byte_msh
cmpd r_HL, r_addr
ble bpf_slow_path_byte_msh
lbzx r_X, r_D, r_addr
rlwinm r_X, r_X, 2, 32-4-2, 31-2
blr
+bpf_error_slow:
+ /* fabricate a cr0 = lt */
+ li r_scratch1, -1
+ cmpdi r_scratch1, 0
bpf_error:
/* Entered with cr0 = lt */
li r3, 0
@@ -136,3 +139,57 @@ bpf_slow_path_byte_msh:
lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
rlwinm r_X, r_X, 2, 32-4-2, 31-2
blr
+
+/* Call out to bpf_internal_load_pointer_neg_helper:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define bpf_slow_path_neg_common(SIZE) \
+ lis r_scratch1,-32; /* SKF_LL_OFF */ \
+ cmpd r_addr, r_scratch1; /* addr < SKF_* */ \
+ blt bpf_error; /* cr0 = LT */ \
+ mflr r0; \
+ std r0, 16(r1); \
+ /* R3 goes in parameter space of caller's frame */ \
+ std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
+ std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
+ std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
+ stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
+ /* R3 = r_skb, as passed */ \
+ mr r4, r_addr; \
+ li r5, SIZE; \
+ bl bpf_internal_load_pointer_neg_helper; \
+ /* R3 != 0 on success */ \
+ addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
+ ld r0, 16(r1); \
+ ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
+ ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
+ mtlr r0; \
+ cmpldi r3, 0; \
+ beq bpf_error_slow; /* cr0 = EQ */ \
+ mr r_addr, r3; \
+ ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
+ /* Great success! */
+
+bpf_slow_path_neg_word:
+ bpf_slow_path_neg_common(4)
+ lwz r_A, (r_addr)
+ blr
+
+bpf_slow_path_neg_half:
+ bpf_slow_path_neg_common(2)
+ lhz r_A, (r_addr)
+ blr
+
+bpf_slow_path_neg_byte:
+ bpf_slow_path_neg_common(1)
+ lbz r_A, (r_addr)
+ blr
+
+bpf_slow_path_neg_byte_msh:
+ bpf_slow_path_neg_common(1)
+ lbz r_X, (r_addr)
+ rlwinm r_X, r_X, 2, 32-4-2, 31-2
+ blr
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 73619d3..0019f70 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -400,12 +400,10 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
func = sk_load_byte;
common_load:
/*
- * Load from [K]. Reference with the (negative)
- * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
+ * Load from [K]. Negative offsets are tested for
+ * in the helper functions.
*/
ctx->seen |= SEEN_DATAREF;
- if ((int)K < 0)
- return -ENOTSUPP;
PPC_LI64(r_scratch1, func);
PPC_MTLR(r_scratch1);
PPC_LI32(r_addr, K);
@@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
common_load_ind:
/*
* Load from [X + K]. Negative offsets are tested for
- * in the helper functions, and result in a 'ret 0'.
+ * in the helper functions.
*/
ctx->seen |= SEEN_DATAREF | SEEN_XREG;
PPC_LI64(r_scratch1, func);
@@ -443,12 +441,6 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
break;
case BPF_S_LDX_B_MSH:
- /*
- * x86 version drops packet (RET 0) when K<0, whereas
- * interpreter does allow K<0 (__load_pointer, special
- * ancillary data). common_load returns ENOTSUPP if K<0,
- * so we fall back to interpreter & filter works.
- */
func = sk_load_byte_msh;
goto common_load;
break;
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6687022..1201783 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,13 @@
* r9d : hlen = skb->len - skb->data_len
*/
#define SKBDATA %r8
-
-sk_load_word_ind:
- .globl sk_load_word_ind
-
- add %ebx,%esi /* offset += X */
-# test %esi,%esi /* if (offset < 0) goto bpf_error; */
- js bpf_error
+#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
sk_load_word:
.globl sk_load_word
+ test %esi,%esi
+ js bpf_slow_path_word_neg
mov %r9d,%eax # hlen
sub %esi,%eax # hlen - offset
cmp $3,%eax
@@ -37,16 +33,11 @@ sk_load_word:
bswap %eax /* ntohl() */
ret
-
-sk_load_half_ind:
- .globl sk_load_half_ind
-
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_half:
.globl sk_load_half
+ test %esi,%esi
+ js bpf_slow_path_half_neg
mov %r9d,%eax
sub %esi,%eax # hlen - offset
cmp $1,%eax
@@ -55,14 +46,11 @@ sk_load_half:
rol $8,%ax # ntohs()
ret
-sk_load_byte_ind:
- .globl sk_load_byte_ind
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_byte:
.globl sk_load_byte
+ test %esi,%esi
+ js bpf_slow_path_byte_neg
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
jle bpf_slow_path_byte
movzbl (SKBDATA,%rsi),%eax
@@ -73,10 +61,12 @@ sk_load_byte:
*
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
* Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
*/
ENTRY(sk_load_byte_msh)
CFI_STARTPROC
+ test %esi,%esi
+ js bpf_slow_path_byte_msh_neg
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
jle bpf_slow_path_byte_msh
movzbl (SKBDATA,%rsi),%ebx
@@ -138,3 +128,45 @@ bpf_slow_path_byte_msh:
shl $2,%al
xchg %eax,%ebx
ret
+
+#define bpf_slow_path_neg_common(SIZE) \
+ cmp SKF_MAX_NEG_OFF, %esi; /* test range */ \
+ jl bpf_error; /* offset lower -> error */ \
+ push %rdi; /* save skb */ \
+ push %r9; \
+ push SKBDATA; \
+/* rsi already has offset */ \
+ mov $SIZE,%ecx; /* size */ \
+ call bpf_internal_load_pointer_neg_helper; \
+ test %rax,%rax; \
+ pop SKBDATA; \
+ pop %r9; \
+ pop %rdi; \
+ jz bpf_error
+
+bpf_slow_path_word_neg:
+ bpf_slow_path_neg_common(4)
+ mov (%rax), %eax
+ bswap %eax
+ ret
+
+bpf_slow_path_half_neg:
+ bpf_slow_path_neg_common(2)
+ mov (%rax),%ax
+ rol $8,%ax
+ movzwl %ax,%eax
+ ret
+
+bpf_slow_path_byte_neg:
+ bpf_slow_path_neg_common(1)
+ movzbl (%rax), %eax
+ ret
+
+bpf_slow_path_byte_msh_neg:
+ xchg %eax,%ebx /* dont lose A , X is about to be scratched */
+ bpf_slow_path_neg_common(1)
+ movzbl (%rax),%eax
+ and $15,%al
+ shl $2,%al
+ xchg %eax,%ebx
+ ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5671752..3c4a454 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,6 @@ int bpf_jit_enable __read_mostly;
* assembly code in arch/x86/net/bpf_jit.S
*/
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -475,10 +474,6 @@ void bpf_jit_compile(struct sk_filter *fp)
case BPF_S_LD_W_ABS:
func = sk_load_word;
common_load: seen |= SEEN_DATAREF;
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call */
@@ -490,27 +485,28 @@ common_load: seen |= SEEN_DATAREF;
func = sk_load_byte;
goto common_load;
case BPF_S_LDX_B_MSH:
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
seen |= SEEN_DATAREF | SEEN_XREG;
t_offset = sk_load_byte_msh - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
break;
case BPF_S_LD_W_IND:
- func = sk_load_word_ind;
+ func = sk_load_word;
common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
t_offset = func - (image + addrs[i]);
- EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+ if (K) {
+ EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+ EMIT(K, 4);
+ } else {
+ EMIT2(0x89,0xde); /* mov %ebx,%esi */
+ }
EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
break;
case BPF_S_LD_H_IND:
- func = sk_load_half_ind;
+ func = sk_load_half;
goto common_load_ind;
case BPF_S_LD_B_IND:
- func = sk_load_byte_ind;
+ func = sk_load_byte;
goto common_load_ind;
case BPF_S_JMP_JA:
t_offset = addrs[i + K] - addrs[i];
diff --git a/net/core/filter.c b/net/core/filter.c
index 5dea452..04ca613 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -41,7 +41,7 @@
#include <linux/ratelimit.h>
/* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
{
u8 *ptr = NULL;
@@ -54,13 +54,14 @@ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
return ptr;
return NULL;
}
+EXPORT_SYMBOL(bpf_internal_load_pointer_neg_helper);
static inline void *load_pointer(const struct sk_buff *skb, int k,
unsigned int size, void *buffer)
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
- return __load_pointer(skb, k, size);
+ return bpf_internal_load_pointer_neg_helper(skb, k, size);
}
/**
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-28 19:15 [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references Jan Seiffert
@ 2012-03-28 20:05 ` Eric Dumazet
2012-03-28 20:26 ` Jan Seiffert
2012-03-28 20:58 ` David Miller
1 sibling, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2012-03-28 20:05 UTC (permalink / raw)
To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans
On Wed, 2012-03-28 at 21:15 +0200, Jan Seiffert wrote:
> Consider the following test program:
>
> #include <stdio.h>
> #include <sys/types.h>
> #include <sys/socket.h>
> #include <netinet/in.h>
> #include <pcap-bpf.h>
>
> #define die(x) do {perror(x); return 1;} while (0)
> struct bpf_insn udp_filter[] = {
> /* 0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax net[0] */
> /* 1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0), /* ldb [x+0] */
> /* 2 */ BPF_STMT(BPF_RET|BPF_A, 0), /* ret a */
> };
When this point was raised some weeks ago, we wanted to see a _real_ use
of negative mem reference.
You provide a test program but what this filter is supposed to do
exactly ?
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-28 20:05 ` Eric Dumazet
@ 2012-03-28 20:26 ` Jan Seiffert
2012-03-28 20:39 ` Eric Dumazet
0 siblings, 1 reply; 14+ messages in thread
From: Jan Seiffert @ 2012-03-28 20:26 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans
2012/3/28 Eric Dumazet <eric.dumazet@gmail.com>:
> On Wed, 2012-03-28 at 21:15 +0200, Jan Seiffert wrote:
>> Consider the following test program:
>>
>> #include <stdio.h>
>> #include <sys/types.h>
>> #include <sys/socket.h>
>> #include <netinet/in.h>
>> #include <pcap-bpf.h>
>>
>> #define die(x) do {perror(x); return 1;} while (0)
>> struct bpf_insn udp_filter[] = {
>> /* 0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax net[0] */
>> /* 1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0), /* ldb [x+0] */
>> /* 2 */ BPF_STMT(BPF_RET|BPF_A, 0), /* ret a */
>> };
>
> When this point was raised some weeks ago, we wanted to see a _real_ use
> of negative mem reference.
>
> You provide a test program but what this filter is supposed to do
> exactly ?
>
Say you have a UDP socket, and you want to filter for bogus source
addresses (drop already in kernel to save the context switch).
To have only one bpf program for ipv4 and ipv6 (you have to checked
the same bogus v4 addresses in mapped space), there is a point where
it elegant to have a negative offset saved in the X register.
This is how i found the Bug.
But use case or not, the jits behavior is different than the interpreter.
Example (not the complete program):
struct bpf_insn UDPPValid[] = {
/* 0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(12)),
/* 1 */ BPF_STMT(BPF_LD|BPF_B|BPF_ABS, -1048576+(0)),
/* 2 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xf0),
/* 3 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x40, 22 - 4, 0),
/* 4 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x60, 5 - 5, 39 - 5),
/* 5 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 12 - 6, 0),
/* 6 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x20010DB8, 39 - 7, 0),
/* 7 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x20010002, 18 - 8, 0),
/* 8 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xfffffff0),
/* 9 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x20010010, 39 - 10, 0),
/* 10 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xff000000),
/* 11 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xff000000, 39 - 12, 37 - 12),
/* 12 */ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, -1048576+(12)),
/* 13 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 0, 37 - 14),
/* 14 */ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, -1048576+(16)),
/* 15 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xffff, 21 - 16, 0),
/* 16 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0064FF9B, 21 - 17, 0),
/* 17 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 39 - 18, 37 -
18),
/* 18 */ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, -1048576+(12)),
/* 19 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xffff0000),
/* 20 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 39 - 21, 37 -
21),
/* 21 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(20)),
/* 22 */ BPF_STMT(BPF_LD|BPF_W|BPF_IND, 0),
/* 23 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xffffffff, 39 - 24,
0),
/* 24 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xffffff00),
/* 25 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC0000000, 39 - 26,
0),
/* 26 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC0000200, 39 - 27,
0),
/* 27 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC6336400, 39 - 28,
0),
/* 28 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xCB007100, 39 - 29,
0),
/* 29 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC0586300, 39 - 30,
0),
/* 30 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xfffe0000),
/* 31 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC6120000, 39 - 32,
0),
/* 32 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xff000000),
/* 33 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 39 - 34, 0),
/* 34 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xf0000000),
/* 35 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xE0000000, 39 - 36,
0),
/* 36 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xF0000000, 39 - 37,
0),
/* 37 */ BPF_STMT(BPF_LD|BPF_W|BPF_LEN, 0),
/* 38 */ BPF_STMT(BPF_RET|BPF_A, 0),
/* 39 */ BPF_STMT(BPF_RET|BPF_K, 0),
};
>
>
Greetings
Jan
--
˙qɐɥ ʇɟnɐʞǝƃ ʎɐqǝ ıǝq ɹnʇɐʇsɐʇ ǝuıǝ ɹıɯ ɥɔı sɐp lɐɯ ǝʇzʇǝl sɐp ʇsı sɐp
'ʇɯɯɐpɹǝʌ
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-28 20:26 ` Jan Seiffert
@ 2012-03-28 20:39 ` Eric Dumazet
2012-03-29 11:54 ` Jan Seiffert
0 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2012-03-28 20:39 UTC (permalink / raw)
To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans
On Wed, 2012-03-28 at 22:26 +0200, Jan Seiffert wrote:
> 2012/3/28 Eric Dumazet <eric.dumazet@gmail.com>:
> > On Wed, 2012-03-28 at 21:15 +0200, Jan Seiffert wrote:
> >> Consider the following test program:
> >>
> >> #include <stdio.h>
> >> #include <sys/types.h>
> >> #include <sys/socket.h>
> >> #include <netinet/in.h>
> >> #include <pcap-bpf.h>
> >>
> >> #define die(x) do {perror(x); return 1;} while (0)
> >> struct bpf_insn udp_filter[] = {
> >> /* 0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax net[0] */
> >> /* 1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0), /* ldb [x+0] */
> >> /* 2 */ BPF_STMT(BPF_RET|BPF_A, 0), /* ret a */
> >> };
> >
> > When this point was raised some weeks ago, we wanted to see a _real_ use
> > of negative mem reference.
> >
> > You provide a test program but what this filter is supposed to do
> > exactly ?
> >
>
> Say you have a UDP socket, and you want to filter for bogus source
> addresses (drop already in kernel to save the context switch).
> To have only one bpf program for ipv4 and ipv6 (you have to checked
> the same bogus v4 addresses in mapped space), there is a point where
> it elegant to have a negative offset saved in the X register.
Cool, thats a valid use, thanks.
Problem is you slow down the jit in its normal use, for a very specific
use.
Please rework your patch so that absolute loads of positive offsets
(known at compile time) dont have to test negative offsets at run time.
You add two instructions per load, and thats not good.
Something like :
sk_load_word:
.globl sk_load_word
test %esi,%esi
js bpf_slow_path_word_neg
sk_load_word_positive_offset:
.globl sk_load_word_positive_offset
mov %r9d,%eax # hlen
sub %esi,%eax # hlen - offset
cmp $3,%eax
...
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-28 19:15 [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references Jan Seiffert
2012-03-28 20:05 ` Eric Dumazet
@ 2012-03-28 20:58 ` David Miller
1 sibling, 0 replies; 14+ messages in thread
From: David Miller @ 2012-03-28 20:58 UTC (permalink / raw)
To: kaffeemonster; +Cc: netdev, linux-kernel, matt, eric.dumazet
Please do not post patches as octet-stream attachments, post them
inline so that:
1) People can more easily review and quote the code in replies.
2) It actually gets logged properly in patchwork.
Otherwise I can guarentee your patch will get less attention than
you would like.
Thanks.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-28 20:39 ` Eric Dumazet
@ 2012-03-29 11:54 ` Jan Seiffert
2012-03-29 13:57 ` Eric Dumazet
2012-03-30 9:11 ` Eric Dumazet
0 siblings, 2 replies; 14+ messages in thread
From: Jan Seiffert @ 2012-03-29 11:54 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, linux-kernel, David S. Miller, Matt Evans
Eric Dumazet schrieb:
> On Wed, 2012-03-28 at 22:26 +0200, Jan Seiffert wrote:
> [snip]
>> Say you have a UDP socket, and you want to filter for bogus source
>> addresses (drop already in kernel to save the context switch).
>> To have only one bpf program for ipv4 and ipv6 (you have to checked
>> the same bogus v4 addresses in mapped space), there is a point where
>> it elegant to have a negative offset saved in the X register.
> Cool, thats a valid use, thanks.
>
>
> Problem is you slow down the jit in its normal use, for a very specific
> use.
>
> Please rework your patch so that absolute loads of positive offsets
> (known at compile time) dont have to test negative offsets at run time.
>
> You add two instructions per load, and thats not good.
>
> Something like :
>
> sk_load_word:
> .globl sk_load_word
>
> test %esi,%esi
> js bpf_slow_path_word_neg
>
> sk_load_word_positive_offset:
> .globl sk_load_word_positive_offset
>
> mov %r9d,%eax # hlen
> sub %esi,%eax # hlen - offset
> cmp $3,%eax
>
> ...
>
>
>
Ok, to keep the ball rolling here is a V2 with the changes you suggested:
Consider the following test program:
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pcap-bpf.h>
#define die(x) do {perror(x); return 1;} while (0)
struct bpf_insn udp_filter[] = {
/* 0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax net[0] */
/* 1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0), /* ldb [x+0] */
/* 2 */ BPF_STMT(BPF_RET|BPF_A, 0), /* ret a */
};
int main(int argc, char *argv[])
{
char buf[512];
struct sockaddr_in addr;
struct bpf_program prg;
socklen_t addr_s;
ssize_t res;
int fd;
addr.sin_family = AF_INET;
addr.sin_port = htons(5000);
addr.sin_addr.s_addr = 0;
addr_s = sizeof(addr);
prg.bf_len = sizeof(udp_filter)/sizeof(udp_filter[0]);
prg.bf_insns = udp_filter;
if(-1 == (fd = socket(AF_INET, SOCK_DGRAM, 0)))
die("socket");
if(-1 == bind(fd, (struct sockaddr *)&addr, sizeof(addr)))
die("bind");
if(-1 == setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prg, sizeof(prg)))
die("setsockopt");
res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr *)&addr, &addr_s);
if(res != -1)
printf("packet received: %zi bytes\n", res);
else
die("recvfrom");
return 0;
}
when used with the bpf jit disabled works:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1: packet received: 6 bytes
When the bpf jit gets enabled (echo 100 >
/proc/sys/net/core/bpf_jit_enable) the same program stops working:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1:
The reason is that both jits (x86 and powerpc) do not handle negative
memory references like SKF_NET_OFF or SKF_LL_OFF, only the simple
ancillary data references are supported (by mapping to special
instructions).
In the case of an absolute reference, the jit aborts the translation
if a negative reference is seen, also a negative k on the indirect
load aborts the translation, but if X is negative to begin with, only
the error handler is reached at runtime which drops the whole packet.
I propose the following patch to fix this situation.
Lightly tested on x86, but the powerpc asm part is prop. wrong.
Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index af1ab5e..e9b57b3 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -49,6 +49,10 @@
* Assembly helpers from arch/powerpc/net/bpf_jit.S:
*/
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
#define FUNCTION_DESCR_SIZE 24
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index ff4506e..e590aa5 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -31,14 +31,13 @@
* then branch directly to slow_path_XXX if required. (In fact, could
* load a spare GPR with the address of slow_path_generic and pass size
* as an argument, making the call site a mtlr, li and bllr.)
- *
- * Technically, the "is addr < 0" check is unnecessary & slowing down
- * the ABS path, as it's statically checked on generation.
*/
.globl sk_load_word
sk_load_word:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_word_neg
+ .globl sk_load_word_positive_offset
+sk_load_word_positive_offset:
/* Are we accessing past headlen? */
subi r_scratch1, r_HL, 4
cmpd r_scratch1, r_addr
@@ -51,7 +50,9 @@ sk_load_word:
.globl sk_load_half
sk_load_half:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_half_neg
+ .globl sk_load_half_positive_offset
+sk_load_half_positive_offset:
subi r_scratch1, r_HL, 2
cmpd r_scratch1, r_addr
blt bpf_slow_path_half
@@ -61,7 +62,9 @@ sk_load_half:
.globl sk_load_byte
sk_load_byte:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_byte_neg
+ .globl sk_load_byte_positive_offset
+sk_load_byte_positive_offset:
cmpd r_HL, r_addr
ble bpf_slow_path_byte
lbzx r_A, r_D, r_addr
@@ -69,22 +72,20 @@ sk_load_byte:
/*
* BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf)
- * r_addr is the offset value, already known positive
+ * r_addr is the offset value
*/
.globl sk_load_byte_msh
sk_load_byte_msh:
+ cmpdi r_addr, 0
+ blt bpf_slow_path_byte_msh_neg
+ .globl sk_load_byte_msh_positive_offset
+sk_load_byte_msh_positive_offset:
cmpd r_HL, r_addr
ble bpf_slow_path_byte_msh
lbzx r_X, r_D, r_addr
rlwinm r_X, r_X, 2, 32-4-2, 31-2
blr
-bpf_error:
- /* Entered with cr0 = lt */
- li r3, 0
- /* Generated code will 'blt epilogue', returning 0. */
- blr
-
/* Call out to skb_copy_bits:
* We'll need to back up our volatile regs first; we have
* local variable space at r1+(BPF_PPC_STACK_BASIC).
@@ -136,3 +137,85 @@ bpf_slow_path_byte_msh:
lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
rlwinm r_X, r_X, 2, 32-4-2, 31-2
blr
+
+/* Call out to bpf_internal_load_pointer_neg_helper:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define sk_negative_common(SIZE) \
+ mflr r0; \
+ std r0, 16(r1); \
+ /* R3 goes in parameter space of caller's frame */ \
+ std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
+ std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
+ std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
+ stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
+ /* R3 = r_skb, as passed */ \
+ mr r4, r_addr; \
+ li r5, SIZE; \
+ bl bpf_internal_load_pointer_neg_helper; \
+ /* R3 != 0 on success */ \
+ addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
+ ld r0, 16(r1); \
+ ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
+ ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
+ mtlr r0; \
+ cmpldi r3, 0; \
+ beq bpf_error_slow; /* cr0 = EQ */ \
+ mr r_addr, r3; \
+ ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
+ /* Great success! */
+
+bpf_slow_path_word_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_word_negative_offset
+sk_load_word_negative_offset:
+ sk_negative_common(4)
+ lwz r_A, 0(r_addr)
+ blr
+
+bpf_slow_path_half_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_half_negative_offset
+sk_load_half_negative_offset:
+ sk_negative_common(2)
+ lhz r_A, 0(r_addr)
+ blr
+
+bpf_slow_path_byte_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_byte_negative_offset
+sk_load_byte_negative_offset:
+ sk_negative_common(1)
+ lbz r_A, 0(r_addr)
+ blr
+
+bpf_slow_path_byte_msh_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_byte_msh_negative_offset
+sk_load_byte_msh_negative_offset:
+ sk_negative_common(1)
+ lbz r_X, 0(r_addr)
+ rlwinm r_X, r_X, 2, 32-4-2, 31-2
+ blr
+
+bpf_error_slow:
+ /* fabricate a cr0 = lt */
+ li r_scratch1, -1
+ cmpdi r_scratch1, 0
+bpf_error:
+ /* Entered with cr0 = lt */
+ li r3, 0
+ /* Generated code will 'blt epilogue', returning 0. */
+ blr
+
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 73619d3..2dc8b14 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -127,6 +127,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
PPC_BLR();
}
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+
/* Assemble the body code between the prologue & epilogue. */
static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
struct codegen_context *ctx,
@@ -391,21 +394,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
/*** Absolute loads from packet header/data ***/
case BPF_S_LD_W_ABS:
- func = sk_load_word;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_word);
goto common_load;
case BPF_S_LD_H_ABS:
- func = sk_load_half;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_half);
goto common_load;
case BPF_S_LD_B_ABS:
- func = sk_load_byte;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
common_load:
- /*
- * Load from [K]. Reference with the (negative)
- * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
- */
+ /* Load from [K]. */
ctx->seen |= SEEN_DATAREF;
- if ((int)K < 0)
- return -ENOTSUPP;
PPC_LI64(r_scratch1, func);
PPC_MTLR(r_scratch1);
PPC_LI32(r_addr, K);
@@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
common_load_ind:
/*
* Load from [X + K]. Negative offsets are tested for
- * in the helper functions, and result in a 'ret 0'.
+ * in the helper functions.
*/
ctx->seen |= SEEN_DATAREF | SEEN_XREG;
PPC_LI64(r_scratch1, func);
@@ -443,13 +441,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
break;
case BPF_S_LDX_B_MSH:
- /*
- * x86 version drops packet (RET 0) when K<0, whereas
- * interpreter does allow K<0 (__load_pointer, special
- * ancillary data). common_load returns ENOTSUPP if K<0,
- * so we fall back to interpreter & filter works.
- */
- func = sk_load_byte_msh;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
goto common_load;
break;
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6687022..63ae130 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,18 @@
* r9d : hlen = skb->len - skb->data_len
*/
#define SKBDATA %r8
+#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
-sk_load_word_ind:
- .globl sk_load_word_ind
-
- add %ebx,%esi /* offset += X */
-# test %esi,%esi /* if (offset < 0) goto bpf_error; */
- js bpf_error
-
+ .p2align 1
sk_load_word:
.globl sk_load_word
+ test %esi,%esi
+ js bpf_slow_path_word_neg
+
+sk_load_word_positive_offset:
+ .globl sk_load_word_positive_offset
+
mov %r9d,%eax # hlen
sub %esi,%eax # hlen - offset
cmp $3,%eax
@@ -37,16 +38,16 @@ sk_load_word:
bswap %eax /* ntohl() */
ret
-
-sk_load_half_ind:
- .globl sk_load_half_ind
-
- add %ebx,%esi /* offset += X */
- js bpf_error
-
+ .p2align 1
sk_load_half:
.globl sk_load_half
+ test %esi,%esi
+ js bpf_slow_path_half_neg
+
+sk_load_half_positive_offset:
+ .globl sk_load_half_positive_offset
+
mov %r9d,%eax
sub %esi,%eax # hlen - offset
cmp $1,%eax
@@ -55,14 +56,16 @@ sk_load_half:
rol $8,%ax # ntohs()
ret
-sk_load_byte_ind:
- .globl sk_load_byte_ind
- add %ebx,%esi /* offset += X */
- js bpf_error
-
+ .p2align 1
sk_load_byte:
.globl sk_load_byte
+ test %esi,%esi
+ js bpf_slow_path_byte_neg
+
+sk_load_byte_positive_offset:
+ .globl sk_load_byte_positive_offset
+
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
jle bpf_slow_path_byte
movzbl (SKBDATA,%rsi),%eax
@@ -73,25 +76,22 @@ sk_load_byte:
*
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
* Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
*/
-ENTRY(sk_load_byte_msh)
- CFI_STARTPROC
+ .p2align 1
+sk_load_byte_msh:
+ .globl sk_load_byte_msh
+ test %esi,%esi
+ js bpf_slow_path_byte_msh_neg
+
+sk_load_byte_msh_positive_offset:
+ .globl sk_load_byte_msh_positive_offset
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
jle bpf_slow_path_byte_msh
movzbl (SKBDATA,%rsi),%ebx
and $15,%bl
shl $2,%bl
ret
- CFI_ENDPROC
-ENDPROC(sk_load_byte_msh)
-
-bpf_error:
-# force a return 0 from jit handler
- xor %eax,%eax
- mov -8(%rbp),%rbx
- leaveq
- ret
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
@@ -108,6 +108,7 @@ bpf_error:
pop %rdi
+ .p2align 1
bpf_slow_path_word:
bpf_slow_path_common(4)
js bpf_error
@@ -115,6 +116,7 @@ bpf_slow_path_word:
bswap %eax
ret
+ .p2align 1
bpf_slow_path_half:
bpf_slow_path_common(2)
js bpf_error
@@ -123,12 +125,14 @@ bpf_slow_path_half:
movzwl %ax,%eax
ret
+ .p2align 1
bpf_slow_path_byte:
bpf_slow_path_common(1)
js bpf_error
movzbl -12(%rbp),%eax
ret
+ .p2align 1
bpf_slow_path_byte_msh:
xchg %eax,%ebx /* dont lose A , X is about to be scratched */
bpf_slow_path_common(1)
@@ -138,3 +142,73 @@ bpf_slow_path_byte_msh:
shl $2,%al
xchg %eax,%ebx
ret
+
+#define sk_negative_common(SIZE) \
+ push %rdi; /* save skb */ \
+ push %r9; \
+ push SKBDATA; \
+/* rsi already has offset */ \
+ mov $SIZE,%ecx; /* size */ \
+ call bpf_internal_load_pointer_neg_helper; \
+ test %rax,%rax; \
+ pop SKBDATA; \
+ pop %r9; \
+ pop %rdi; \
+ jz bpf_error
+
+
+ .p2align 1
+bpf_slow_path_word_neg:
+ cmp SKF_MAX_NEG_OFF, %esi /* test range */
+ jl bpf_error /* offset lower -> error */
+sk_load_word_negative_offset:
+ .globl sk_load_word_negative_offset
+ sk_negative_common(4)
+ mov (%rax), %eax
+ bswap %eax
+ ret
+
+ .p2align 1
+bpf_slow_path_half_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_half_negative_offset:
+ .globl sk_load_half_negative_offset
+ sk_negative_common(2)
+ mov (%rax),%ax
+ rol $8,%ax
+ movzwl %ax,%eax
+ ret
+
+ .p2align 1
+bpf_slow_path_byte_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_negative_offset:
+ .globl sk_load_byte_negative_offset
+ sk_negative_common(1)
+ movzbl (%rax), %eax
+ ret
+
+ .p2align 1
+bpf_slow_path_byte_msh_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_msh_negative_offset:
+ .globl sk_load_byte_msh_negative_offset
+ xchg %eax,%ebx /* dont lose A , X is about to be scratched */
+ sk_negative_common(1)
+ movzbl (%rax),%eax
+ and $15,%al
+ shl $2,%al
+ xchg %eax,%ebx
+ ret
+
+ .p2align 1
+bpf_error:
+# force a return 0 from jit handler
+ xor %eax,%eax
+ mov -8(%rbp),%rbx
+ leaveq
+ ret
+
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5671752..c20374f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
* assembly code in arch/x86/net/bpf_jit.S
*/
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
set_fs(old_fs);
}
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
void bpf_jit_compile(struct sk_filter *fp)
{
@@ -473,44 +478,42 @@ void bpf_jit_compile(struct sk_filter *fp)
#endif
break;
case BPF_S_LD_W_ABS:
- func = sk_load_word;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_word);
common_load: seen |= SEEN_DATAREF;
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call */
break;
case BPF_S_LD_H_ABS:
- func = sk_load_half;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_half);
goto common_load;
case BPF_S_LD_B_ABS:
- func = sk_load_byte;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
goto common_load;
case BPF_S_LDX_B_MSH:
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
seen |= SEEN_DATAREF | SEEN_XREG;
- t_offset = sk_load_byte_msh - (image + addrs[i]);
+ t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
break;
case BPF_S_LD_W_IND:
- func = sk_load_word_ind;
+ func = sk_load_word;
common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
t_offset = func - (image + addrs[i]);
- EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+ if (K) {
+ EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+ EMIT(K, 4);
+ } else {
+ EMIT2(0x89,0xde); /* mov %ebx,%esi */
+ }
EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
break;
case BPF_S_LD_H_IND:
- func = sk_load_half_ind;
+ func = sk_load_half;
goto common_load_ind;
case BPF_S_LD_B_IND:
- func = sk_load_byte_ind;
+ func = sk_load_byte;
goto common_load_ind;
case BPF_S_JMP_JA:
t_offset = addrs[i + K] - addrs[i];
diff --git a/net/core/filter.c b/net/core/filter.c
index 5dea452..04ca613 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -41,7 +41,7 @@
#include <linux/ratelimit.h>
/* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
{
u8 *ptr = NULL;
@@ -54,13 +54,14 @@ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
return ptr;
return NULL;
}
+EXPORT_SYMBOL(bpf_internal_load_pointer_neg_helper);
static inline void *load_pointer(const struct sk_buff *skb, int k,
unsigned int size, void *buffer)
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
- return __load_pointer(skb, k, size);
+ return bpf_internal_load_pointer_neg_helper(skb, k, size);
}
/**
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-29 11:54 ` Jan Seiffert
@ 2012-03-29 13:57 ` Eric Dumazet
2012-03-30 9:11 ` Eric Dumazet
1 sibling, 0 replies; 14+ messages in thread
From: Eric Dumazet @ 2012-03-29 13:57 UTC (permalink / raw)
To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans
Le jeudi 29 mars 2012 à 13:54 +0200, Jan Seiffert a écrit :
> Ok, to keep the ball rolling here is a V2 with the changes you suggested:
>
...
> I propose the following patch to fix this situation.
> Lightly tested on x86, but the powerpc asm part is prop. wrong.
>
>
> Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>
>
x86 part seems very good at first glance, I'll test it and give my
feedback soon.
Thanks !
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-29 11:54 ` Jan Seiffert
2012-03-29 13:57 ` Eric Dumazet
@ 2012-03-30 9:11 ` Eric Dumazet
2012-03-30 13:42 ` Jan Seiffert
1 sibling, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2012-03-30 9:11 UTC (permalink / raw)
To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans
Le jeudi 29 mars 2012 à 13:54 +0200, Jan Seiffert a écrit :
>
> + .p2align 1
> bpf_slow_path_word:
> bpf_slow_path_common(4)
> js bpf_error
> @@ -115,6 +116,7 @@ bpf_slow_path_word:
> bswap %eax
> ret
>
> + .p2align 1
> bpf_slow_path_half:
> bpf_slow_path_common(2)
> js bpf_error
> @@ -123,12 +125,14 @@ bpf_slow_path_half:
> movzwl %ax,%eax
> ret
All these ".p2align 1" are noise for this patch.
This should be done as separate patch, explaining the rationale.
...
> case BPF_S_LD_W_IND:
> - func = sk_load_word_ind;
> + func = sk_load_word;
> common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
> t_offset = func - (image + addrs[i]);
> - EMIT1_off32(0xbe, K); /* mov imm32,%esi */
> + if (K) {
> + EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
> + EMIT(K, 4);
> + } else {
> + EMIT2(0x89,0xde); /* mov %ebx,%esi */
> + }
> EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
> break;
Please add the code for imm8 offsets as well ?
if (is_imm8(K))
EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx),%esi */
else
EMIT2_off32(0x8d, 0xb3, K); /* lea imm32(%rbx),%esi */
Thanks
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-30 9:11 ` Eric Dumazet
@ 2012-03-30 13:42 ` Jan Seiffert
2012-03-30 14:08 ` Eric Dumazet
2012-03-30 15:19 ` Matt Evans
0 siblings, 2 replies; 14+ messages in thread
From: Jan Seiffert @ 2012-03-30 13:42 UTC (permalink / raw)
To: netdev; +Cc: Eric Dumazet, linux-kernel, David S. Miller, Matt Evans
Eric Dumazet schrieb:
> [snip]
> All these ".p2align 1" are noise for this patch.
>
> This should be done as separate patch, explaining the rationale.
>
> ...
Ok, i thought since you where concerned with the performance and
I'm touching this stuff anyway.
But you are right, can be done separately.
So gone.
> [snip]
>
> Please add the code for imm8 offsets as well ?
>
> if (is_imm8(K))
> EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx),%esi */
> else
> EMIT2_off32(0x8d, 0xb3, K); /* lea imm32(%rbx),%esi */
Right, there is this imm8 form. I left it out because i never saw gas emit it
and totally forgot about it.
Your wish is my command. But since there is no EMIT2_off32 and introducing
it would mean additional cleanup noise, i stayed with two emits.
Do you know where i can ping the powerpc guys a little bit harder?
So here a v3 of the patch:
Consider the following test program:
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pcap-bpf.h>
#define die(x) do {perror(x); return 1;} while (0)
struct bpf_insn udp_filter[] = {
/* 0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax net[0] */
/* 1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0), /* ldb [x+0] */
/* 2 */ BPF_STMT(BPF_RET|BPF_A, 0), /* ret a */
};
int main(int argc, char *argv[])
{
char buf[512];
struct sockaddr_in addr;
struct bpf_program prg;
socklen_t addr_s;
ssize_t res;
int fd;
addr.sin_family = AF_INET;
addr.sin_port = htons(5000);
addr.sin_addr.s_addr = 0;
addr_s = sizeof(addr);
prg.bf_len = sizeof(udp_filter)/sizeof(udp_filter[0]);
prg.bf_insns = udp_filter;
if(-1 == (fd = socket(AF_INET, SOCK_DGRAM, 0)))
die("socket");
if(-1 == bind(fd, (struct sockaddr *)&addr, sizeof(addr)))
die("bind");
if(-1 == setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prg, sizeof(prg)))
die("setsockopt");
res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr *)&addr, &addr_s);
if(res != -1)
printf("packet received: %zi bytes\n", res);
else
die("recvfrom");
return 0;
}
when used with the bpf jit disabled works:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1: packet received: 6 bytes
When the bpf jit gets enabled (echo 100 >
/proc/sys/net/core/bpf_jit_enable) the same program stops working:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1:
The reason is that both jits (x86 and powerpc) do not handle negative
memory references like SKF_NET_OFF or SKF_LL_OFF, only the simple
ancillary data references are supported (by mapping to special
instructions).
In the case of an absolute reference, the jit aborts the translation
if a negative reference is seen, also a negative k on the indirect
load aborts the translation, but if X is negative to begin with, only
the error handler is reached at runtime which drops the whole packet.
I propose the following patch to fix this situation.
Lightly tested on x86, but the powerpc asm part is prop. wrong.
Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index af1ab5e..e9b57b3 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -49,6 +49,10 @@
* Assembly helpers from arch/powerpc/net/bpf_jit.S:
*/
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
#define FUNCTION_DESCR_SIZE 24
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index ff4506e..e590aa5 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -31,14 +31,13 @@
* then branch directly to slow_path_XXX if required. (In fact, could
* load a spare GPR with the address of slow_path_generic and pass size
* as an argument, making the call site a mtlr, li and bllr.)
- *
- * Technically, the "is addr < 0" check is unnecessary & slowing down
- * the ABS path, as it's statically checked on generation.
*/
.globl sk_load_word
sk_load_word:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_word_neg
+ .globl sk_load_word_positive_offset
+sk_load_word_positive_offset:
/* Are we accessing past headlen? */
subi r_scratch1, r_HL, 4
cmpd r_scratch1, r_addr
@@ -51,7 +50,9 @@ sk_load_word:
.globl sk_load_half
sk_load_half:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_half_neg
+ .globl sk_load_half_positive_offset
+sk_load_half_positive_offset:
subi r_scratch1, r_HL, 2
cmpd r_scratch1, r_addr
blt bpf_slow_path_half
@@ -61,7 +62,9 @@ sk_load_half:
.globl sk_load_byte
sk_load_byte:
cmpdi r_addr, 0
- blt bpf_error
+ blt bpf_slow_path_byte_neg
+ .globl sk_load_byte_positive_offset
+sk_load_byte_positive_offset:
cmpd r_HL, r_addr
ble bpf_slow_path_byte
lbzx r_A, r_D, r_addr
@@ -69,22 +72,20 @@ sk_load_byte:
/*
* BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf)
- * r_addr is the offset value, already known positive
+ * r_addr is the offset value
*/
.globl sk_load_byte_msh
sk_load_byte_msh:
+ cmpdi r_addr, 0
+ blt bpf_slow_path_byte_msh_neg
+ .globl sk_load_byte_msh_positive_offset
+sk_load_byte_msh_positive_offset:
cmpd r_HL, r_addr
ble bpf_slow_path_byte_msh
lbzx r_X, r_D, r_addr
rlwinm r_X, r_X, 2, 32-4-2, 31-2
blr
-bpf_error:
- /* Entered with cr0 = lt */
- li r3, 0
- /* Generated code will 'blt epilogue', returning 0. */
- blr
-
/* Call out to skb_copy_bits:
* We'll need to back up our volatile regs first; we have
* local variable space at r1+(BPF_PPC_STACK_BASIC).
@@ -136,3 +137,85 @@ bpf_slow_path_byte_msh:
lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
rlwinm r_X, r_X, 2, 32-4-2, 31-2
blr
+
+/* Call out to bpf_internal_load_pointer_neg_helper:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define sk_negative_common(SIZE) \
+ mflr r0; \
+ std r0, 16(r1); \
+ /* R3 goes in parameter space of caller's frame */ \
+ std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
+ std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
+ std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
+ stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
+ /* R3 = r_skb, as passed */ \
+ mr r4, r_addr; \
+ li r5, SIZE; \
+ bl bpf_internal_load_pointer_neg_helper; \
+ /* R3 != 0 on success */ \
+ addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
+ ld r0, 16(r1); \
+ ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
+ ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
+ mtlr r0; \
+ cmpldi r3, 0; \
+ beq bpf_error_slow; /* cr0 = EQ */ \
+ mr r_addr, r3; \
+ ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
+ /* Great success! */
+
+bpf_slow_path_word_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_word_negative_offset
+sk_load_word_negative_offset:
+ sk_negative_common(4)
+ lwz r_A, 0(r_addr)
+ blr
+
+bpf_slow_path_half_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_half_negative_offset
+sk_load_half_negative_offset:
+ sk_negative_common(2)
+ lhz r_A, 0(r_addr)
+ blr
+
+bpf_slow_path_byte_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_byte_negative_offset
+sk_load_byte_negative_offset:
+ sk_negative_common(1)
+ lbz r_A, 0(r_addr)
+ blr
+
+bpf_slow_path_byte_msh_neg:
+ lis r_scratch1,-32 /* SKF_LL_OFF */
+ cmpd r_addr, r_scratch1 /* addr < SKF_* */
+ blt bpf_error /* cr0 = LT */
+ .globl sk_load_byte_msh_negative_offset
+sk_load_byte_msh_negative_offset:
+ sk_negative_common(1)
+ lbz r_X, 0(r_addr)
+ rlwinm r_X, r_X, 2, 32-4-2, 31-2
+ blr
+
+bpf_error_slow:
+ /* fabricate a cr0 = lt */
+ li r_scratch1, -1
+ cmpdi r_scratch1, 0
+bpf_error:
+ /* Entered with cr0 = lt */
+ li r3, 0
+ /* Generated code will 'blt epilogue', returning 0. */
+ blr
+
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 73619d3..2dc8b14 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -127,6 +127,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
PPC_BLR();
}
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+
/* Assemble the body code between the prologue & epilogue. */
static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
struct codegen_context *ctx,
@@ -391,21 +394,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
/*** Absolute loads from packet header/data ***/
case BPF_S_LD_W_ABS:
- func = sk_load_word;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_word);
goto common_load;
case BPF_S_LD_H_ABS:
- func = sk_load_half;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_half);
goto common_load;
case BPF_S_LD_B_ABS:
- func = sk_load_byte;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
common_load:
- /*
- * Load from [K]. Reference with the (negative)
- * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
- */
+ /* Load from [K]. */
ctx->seen |= SEEN_DATAREF;
- if ((int)K < 0)
- return -ENOTSUPP;
PPC_LI64(r_scratch1, func);
PPC_MTLR(r_scratch1);
PPC_LI32(r_addr, K);
@@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
common_load_ind:
/*
* Load from [X + K]. Negative offsets are tested for
- * in the helper functions, and result in a 'ret 0'.
+ * in the helper functions.
*/
ctx->seen |= SEEN_DATAREF | SEEN_XREG;
PPC_LI64(r_scratch1, func);
@@ -443,13 +441,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
break;
case BPF_S_LDX_B_MSH:
- /*
- * x86 version drops packet (RET 0) when K<0, whereas
- * interpreter does allow K<0 (__load_pointer, special
- * ancillary data). common_load returns ENOTSUPP if K<0,
- * so we fall back to interpreter & filter works.
- */
- func = sk_load_byte_msh;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
goto common_load;
break;
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6687022..2897d7f 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,17 @@
* r9d : hlen = skb->len - skb->data_len
*/
#define SKBDATA %r8
-
-sk_load_word_ind:
- .globl sk_load_word_ind
-
- add %ebx,%esi /* offset += X */
-# test %esi,%esi /* if (offset < 0) goto bpf_error; */
- js bpf_error
+#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
sk_load_word:
.globl sk_load_word
+ test %esi,%esi
+ js bpf_slow_path_word_neg
+
+sk_load_word_positive_offset:
+ .globl sk_load_word_positive_offset
+
mov %r9d,%eax # hlen
sub %esi,%eax # hlen - offset
cmp $3,%eax
@@ -37,16 +37,15 @@ sk_load_word:
bswap %eax /* ntohl() */
ret
-
-sk_load_half_ind:
- .globl sk_load_half_ind
-
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_half:
.globl sk_load_half
+ test %esi,%esi
+ js bpf_slow_path_half_neg
+
+sk_load_half_positive_offset:
+ .globl sk_load_half_positive_offset
+
mov %r9d,%eax
sub %esi,%eax # hlen - offset
cmp $1,%eax
@@ -55,14 +54,15 @@ sk_load_half:
rol $8,%ax # ntohs()
ret
-sk_load_byte_ind:
- .globl sk_load_byte_ind
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_byte:
.globl sk_load_byte
+ test %esi,%esi
+ js bpf_slow_path_byte_neg
+
+sk_load_byte_positive_offset:
+ .globl sk_load_byte_positive_offset
+
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
jle bpf_slow_path_byte
movzbl (SKBDATA,%rsi),%eax
@@ -73,25 +73,21 @@ sk_load_byte:
*
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
* Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
*/
-ENTRY(sk_load_byte_msh)
- CFI_STARTPROC
+sk_load_byte_msh:
+ .globl sk_load_byte_msh
+ test %esi,%esi
+ js bpf_slow_path_byte_msh_neg
+
+sk_load_byte_msh_positive_offset:
+ .globl sk_load_byte_msh_positive_offset
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
jle bpf_slow_path_byte_msh
movzbl (SKBDATA,%rsi),%ebx
and $15,%bl
shl $2,%bl
ret
- CFI_ENDPROC
-ENDPROC(sk_load_byte_msh)
-
-bpf_error:
-# force a return 0 from jit handler
- xor %eax,%eax
- mov -8(%rbp),%rbx
- leaveq
- ret
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
@@ -138,3 +134,68 @@ bpf_slow_path_byte_msh:
shl $2,%al
xchg %eax,%ebx
ret
+
+#define sk_negative_common(SIZE) \
+ push %rdi; /* save skb */ \
+ push %r9; \
+ push SKBDATA; \
+/* rsi already has offset */ \
+ mov $SIZE,%ecx; /* size */ \
+ call bpf_internal_load_pointer_neg_helper; \
+ test %rax,%rax; \
+ pop SKBDATA; \
+ pop %r9; \
+ pop %rdi; \
+ jz bpf_error
+
+
+bpf_slow_path_word_neg:
+ cmp SKF_MAX_NEG_OFF, %esi /* test range */
+ jl bpf_error /* offset lower -> error */
+sk_load_word_negative_offset:
+ .globl sk_load_word_negative_offset
+ sk_negative_common(4)
+ mov (%rax), %eax
+ bswap %eax
+ ret
+
+bpf_slow_path_half_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_half_negative_offset:
+ .globl sk_load_half_negative_offset
+ sk_negative_common(2)
+ mov (%rax),%ax
+ rol $8,%ax
+ movzwl %ax,%eax
+ ret
+
+bpf_slow_path_byte_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_negative_offset:
+ .globl sk_load_byte_negative_offset
+ sk_negative_common(1)
+ movzbl (%rax), %eax
+ ret
+
+bpf_slow_path_byte_msh_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_msh_negative_offset:
+ .globl sk_load_byte_msh_negative_offset
+ xchg %eax,%ebx /* dont lose A , X is about to be scratched */
+ sk_negative_common(1)
+ movzbl (%rax),%eax
+ and $15,%al
+ shl $2,%al
+ xchg %eax,%ebx
+ ret
+
+bpf_error:
+# force a return 0 from jit handler
+ xor %eax,%eax
+ mov -8(%rbp),%rbx
+ leaveq
+ ret
+
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5671752..39a2e2c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
* assembly code in arch/x86/net/bpf_jit.S
*/
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
set_fs(old_fs);
}
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
void bpf_jit_compile(struct sk_filter *fp)
{
@@ -473,44 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
#endif
break;
case BPF_S_LD_W_ABS:
- func = sk_load_word;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_word);
common_load: seen |= SEEN_DATAREF;
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call */
break;
case BPF_S_LD_H_ABS:
- func = sk_load_half;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_half);
goto common_load;
case BPF_S_LD_B_ABS:
- func = sk_load_byte;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
goto common_load;
case BPF_S_LDX_B_MSH:
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
seen |= SEEN_DATAREF | SEEN_XREG;
- t_offset = sk_load_byte_msh - (image + addrs[i]);
+ t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
break;
case BPF_S_LD_W_IND:
- func = sk_load_word_ind;
+ func = sk_load_word;
common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
t_offset = func - (image + addrs[i]);
- EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+ if (K) {
+ if (is_imm8(K)) {
+ EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
+ } else {
+ EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+ EMIT(K, 4);
+ }
+ } else {
+ EMIT2(0x89,0xde); /* mov %ebx,%esi */
+ }
EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
break;
case BPF_S_LD_H_IND:
- func = sk_load_half_ind;
+ func = sk_load_half;
goto common_load_ind;
case BPF_S_LD_B_IND:
- func = sk_load_byte_ind;
+ func = sk_load_byte;
goto common_load_ind;
case BPF_S_JMP_JA:
t_offset = addrs[i + K] - addrs[i];
diff --git a/net/core/filter.c b/net/core/filter.c
index 5dea452..04ca613 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -41,7 +41,7 @@
#include <linux/ratelimit.h>
/* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
{
u8 *ptr = NULL;
@@ -54,13 +54,14 @@ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
return ptr;
return NULL;
}
+EXPORT_SYMBOL(bpf_internal_load_pointer_neg_helper);
static inline void *load_pointer(const struct sk_buff *skb, int k,
unsigned int size, void *buffer)
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
- return __load_pointer(skb, k, size);
+ return bpf_internal_load_pointer_neg_helper(skb, k, size);
}
/**
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-30 13:42 ` Jan Seiffert
@ 2012-03-30 14:08 ` Eric Dumazet
2012-03-30 14:11 ` Josh Boyer
2012-03-30 15:24 ` Matt Evans
2012-03-30 15:19 ` Matt Evans
1 sibling, 2 replies; 14+ messages in thread
From: Eric Dumazet @ 2012-03-30 14:08 UTC (permalink / raw)
To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans
On Fri, 2012-03-30 at 15:42 +0200, Jan Seiffert wrote:
> Do you know where i can ping the powerpc guys a little bit harder?
maybe using CC linux-arch@vger.kernel.org
One way to handle that would be to split this patch in 3 parts, so that
I can Ack the non ppc part.
[1] introduce bpf_internal_load_pointer_neg_helper()
[BTW no need to EXPORT_SYMBOL it, since jit is not a module ]
[2] x86_bpf_iit: handle negative offsets in loads
[3] ppc_bpf_jit: handle negative offsets in loads
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-30 14:08 ` Eric Dumazet
@ 2012-03-30 14:11 ` Josh Boyer
2012-03-30 15:24 ` Matt Evans
1 sibling, 0 replies; 14+ messages in thread
From: Josh Boyer @ 2012-03-30 14:11 UTC (permalink / raw)
To: Eric Dumazet
Cc: Jan Seiffert, netdev, linux-kernel, David S. Miller, Matt Evans
On Fri, Mar 30, 2012 at 10:08 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Fri, 2012-03-30 at 15:42 +0200, Jan Seiffert wrote:
>
>> Do you know where i can ping the powerpc guys a little bit harder?
>
> maybe using CC linux-arch@vger.kernel.org
Or linuxppc-dev@lists.ozlabs.org
josh
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-30 13:42 ` Jan Seiffert
2012-03-30 14:08 ` Eric Dumazet
@ 2012-03-30 15:19 ` Matt Evans
2012-03-30 15:51 ` Jan Seiffert
1 sibling, 1 reply; 14+ messages in thread
From: Matt Evans @ 2012-03-30 15:19 UTC (permalink / raw)
To: Jan Seiffert
Cc: <netdev@vger.kernel.org>,
Eric Dumazet, <linux-kernel@vger.kernel.org>,
David S. Miller
Hi Jan,
On 30 Mar 2012, at 14:42, Jan Seiffert <kaffeemonster@googlemail.com> wrote:
> Eric Dumazet schrieb:
>> [snip]
>> All these ".p2align 1" are noise for this patch.
>>
>> This should be done as separate patch, explaining the rationale.
>>
>> ...
> Ok, i thought since you where concerned with the performance and
> I'm touching this stuff anyway.
> But you are right, can be done separately.
> So gone.
>
>> [snip]
>>
>> Please add the code for imm8 offsets as well ?
>>
>> if (is_imm8(K))
>> EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx),%esi */
>> else
>> EMIT2_off32(0x8d, 0xb3, K); /* lea imm32(%rbx),%esi */
>
> Right, there is this imm8 form. I left it out because i never saw gas emit it
> and totally forgot about it.
> Your wish is my command. But since there is no EMIT2_off32 and introducing
> it would mean additional cleanup noise, i stayed with two emits.
>
> Do you know where i can ping the powerpc guys a little bit harder?
No need (unless you mean a different guy), I have enough guilt as it is! :) (Sorry for no response, am moving house and am netless.) I skimmed your patches on my phone but hope to be in a state to review/test over the weekend or early next week. :)
Cheers,
Matt
>
> So here a v3 of the patch:
>
> Consider the following test program:
>
> #include <stdio.h>
> #include <sys/types.h>
> #include <sys/socket.h>
> #include <netinet/in.h>
> #include <pcap-bpf.h>
>
> #define die(x) do {perror(x); return 1;} while (0)
> struct bpf_insn udp_filter[] = {
> /* 0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax net[0] */
> /* 1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0), /* ldb [x+0] */
> /* 2 */ BPF_STMT(BPF_RET|BPF_A, 0), /* ret a */
> };
>
> int main(int argc, char *argv[])
> {
> char buf[512];
> struct sockaddr_in addr;
> struct bpf_program prg;
> socklen_t addr_s;
> ssize_t res;
> int fd;
>
> addr.sin_family = AF_INET;
> addr.sin_port = htons(5000);
> addr.sin_addr.s_addr = 0;
> addr_s = sizeof(addr);
> prg.bf_len = sizeof(udp_filter)/sizeof(udp_filter[0]);
> prg.bf_insns = udp_filter;
> if(-1 == (fd = socket(AF_INET, SOCK_DGRAM, 0)))
> die("socket");
> if(-1 == bind(fd, (struct sockaddr *)&addr, sizeof(addr)))
> die("bind");
> if(-1 == setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prg, sizeof(prg)))
> die("setsockopt");
> res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr *)&addr, &addr_s);
> if(res != -1)
> printf("packet received: %zi bytes\n", res);
> else
> die("recvfrom");
> return 0;
> }
>
> when used with the bpf jit disabled works:
> console 1 $ ./bpf
> console 2 $ echo "hello" | nc -u localhost 5000
> console 1: packet received: 6 bytes
>
> When the bpf jit gets enabled (echo 100 >
> /proc/sys/net/core/bpf_jit_enable) the same program stops working:
> console 1 $ ./bpf
> console 2 $ echo "hello" | nc -u localhost 5000
> console 1:
>
> The reason is that both jits (x86 and powerpc) do not handle negative
> memory references like SKF_NET_OFF or SKF_LL_OFF, only the simple
> ancillary data references are supported (by mapping to special
> instructions).
> In the case of an absolute reference, the jit aborts the translation
> if a negative reference is seen, also a negative k on the indirect
> load aborts the translation, but if X is negative to begin with, only
> the error handler is reached at runtime which drops the whole packet.
>
> I propose the following patch to fix this situation.
> Lightly tested on x86, but the powerpc asm part is prop. wrong.
>
> Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index af1ab5e..e9b57b3 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -49,6 +49,10 @@
> * Assembly helpers from arch/powerpc/net/bpf_jit.S:
> */
> extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
> +extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
> +extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
> +extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
> +extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
>
> #define FUNCTION_DESCR_SIZE 24
>
> diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
> index ff4506e..e590aa5 100644
> --- a/arch/powerpc/net/bpf_jit_64.S
> +++ b/arch/powerpc/net/bpf_jit_64.S
> @@ -31,14 +31,13 @@
> * then branch directly to slow_path_XXX if required. (In fact, could
> * load a spare GPR with the address of slow_path_generic and pass size
> * as an argument, making the call site a mtlr, li and bllr.)
> - *
> - * Technically, the "is addr < 0" check is unnecessary & slowing down
> - * the ABS path, as it's statically checked on generation.
> */
> .globl sk_load_word
> sk_load_word:
> cmpdi r_addr, 0
> - blt bpf_error
> + blt bpf_slow_path_word_neg
> + .globl sk_load_word_positive_offset
> +sk_load_word_positive_offset:
> /* Are we accessing past headlen? */
> subi r_scratch1, r_HL, 4
> cmpd r_scratch1, r_addr
> @@ -51,7 +50,9 @@ sk_load_word:
> .globl sk_load_half
> sk_load_half:
> cmpdi r_addr, 0
> - blt bpf_error
> + blt bpf_slow_path_half_neg
> + .globl sk_load_half_positive_offset
> +sk_load_half_positive_offset:
> subi r_scratch1, r_HL, 2
> cmpd r_scratch1, r_addr
> blt bpf_slow_path_half
> @@ -61,7 +62,9 @@ sk_load_half:
> .globl sk_load_byte
> sk_load_byte:
> cmpdi r_addr, 0
> - blt bpf_error
> + blt bpf_slow_path_byte_neg
> + .globl sk_load_byte_positive_offset
> +sk_load_byte_positive_offset:
> cmpd r_HL, r_addr
> ble bpf_slow_path_byte
> lbzx r_A, r_D, r_addr
> @@ -69,22 +72,20 @@ sk_load_byte:
>
> /*
> * BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf)
> - * r_addr is the offset value, already known positive
> + * r_addr is the offset value
> */
> .globl sk_load_byte_msh
> sk_load_byte_msh:
> + cmpdi r_addr, 0
> + blt bpf_slow_path_byte_msh_neg
> + .globl sk_load_byte_msh_positive_offset
> +sk_load_byte_msh_positive_offset:
> cmpd r_HL, r_addr
> ble bpf_slow_path_byte_msh
> lbzx r_X, r_D, r_addr
> rlwinm r_X, r_X, 2, 32-4-2, 31-2
> blr
>
> -bpf_error:
> - /* Entered with cr0 = lt */
> - li r3, 0
> - /* Generated code will 'blt epilogue', returning 0. */
> - blr
> -
> /* Call out to skb_copy_bits:
> * We'll need to back up our volatile regs first; we have
> * local variable space at r1+(BPF_PPC_STACK_BASIC).
> @@ -136,3 +137,85 @@ bpf_slow_path_byte_msh:
> lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
> rlwinm r_X, r_X, 2, 32-4-2, 31-2
> blr
> +
> +/* Call out to bpf_internal_load_pointer_neg_helper:
> + * We'll need to back up our volatile regs first; we have
> + * local variable space at r1+(BPF_PPC_STACK_BASIC).
> + * Allocate a new stack frame here to remain ABI-compliant in
> + * stashing LR.
> + */
> +#define sk_negative_common(SIZE) \
> + mflr r0; \
> + std r0, 16(r1); \
> + /* R3 goes in parameter space of caller's frame */ \
> + std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
> + std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
> + std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
> + stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
> + /* R3 = r_skb, as passed */ \
> + mr r4, r_addr; \
> + li r5, SIZE; \
> + bl bpf_internal_load_pointer_neg_helper; \
> + /* R3 != 0 on success */ \
> + addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
> + ld r0, 16(r1); \
> + ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
> + ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
> + mtlr r0; \
> + cmpldi r3, 0; \
> + beq bpf_error_slow; /* cr0 = EQ */ \
> + mr r_addr, r3; \
> + ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
> + /* Great success! */
> +
> +bpf_slow_path_word_neg:
> + lis r_scratch1,-32 /* SKF_LL_OFF */
> + cmpd r_addr, r_scratch1 /* addr < SKF_* */
> + blt bpf_error /* cr0 = LT */
> + .globl sk_load_word_negative_offset
> +sk_load_word_negative_offset:
> + sk_negative_common(4)
> + lwz r_A, 0(r_addr)
> + blr
> +
> +bpf_slow_path_half_neg:
> + lis r_scratch1,-32 /* SKF_LL_OFF */
> + cmpd r_addr, r_scratch1 /* addr < SKF_* */
> + blt bpf_error /* cr0 = LT */
> + .globl sk_load_half_negative_offset
> +sk_load_half_negative_offset:
> + sk_negative_common(2)
> + lhz r_A, 0(r_addr)
> + blr
> +
> +bpf_slow_path_byte_neg:
> + lis r_scratch1,-32 /* SKF_LL_OFF */
> + cmpd r_addr, r_scratch1 /* addr < SKF_* */
> + blt bpf_error /* cr0 = LT */
> + .globl sk_load_byte_negative_offset
> +sk_load_byte_negative_offset:
> + sk_negative_common(1)
> + lbz r_A, 0(r_addr)
> + blr
> +
> +bpf_slow_path_byte_msh_neg:
> + lis r_scratch1,-32 /* SKF_LL_OFF */
> + cmpd r_addr, r_scratch1 /* addr < SKF_* */
> + blt bpf_error /* cr0 = LT */
> + .globl sk_load_byte_msh_negative_offset
> +sk_load_byte_msh_negative_offset:
> + sk_negative_common(1)
> + lbz r_X, 0(r_addr)
> + rlwinm r_X, r_X, 2, 32-4-2, 31-2
> + blr
> +
> +bpf_error_slow:
> + /* fabricate a cr0 = lt */
> + li r_scratch1, -1
> + cmpdi r_scratch1, 0
> +bpf_error:
> + /* Entered with cr0 = lt */
> + li r3, 0
> + /* Generated code will 'blt epilogue', returning 0. */
> + blr
> +
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 73619d3..2dc8b14 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -127,6 +127,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
> PPC_BLR();
> }
>
> +#define CHOOSE_LOAD_FUNC(K, func) \
> + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
> +
> /* Assemble the body code between the prologue & epilogue. */
> static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
> struct codegen_context *ctx,
> @@ -391,21 +394,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
>
> /*** Absolute loads from packet header/data ***/
> case BPF_S_LD_W_ABS:
> - func = sk_load_word;
> + func = CHOOSE_LOAD_FUNC(K, sk_load_word);
> goto common_load;
> case BPF_S_LD_H_ABS:
> - func = sk_load_half;
> + func = CHOOSE_LOAD_FUNC(K, sk_load_half);
> goto common_load;
> case BPF_S_LD_B_ABS:
> - func = sk_load_byte;
> + func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
> common_load:
> - /*
> - * Load from [K]. Reference with the (negative)
> - * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
> - */
> + /* Load from [K]. */
> ctx->seen |= SEEN_DATAREF;
> - if ((int)K < 0)
> - return -ENOTSUPP;
> PPC_LI64(r_scratch1, func);
> PPC_MTLR(r_scratch1);
> PPC_LI32(r_addr, K);
> @@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
> common_load_ind:
> /*
> * Load from [X + K]. Negative offsets are tested for
> - * in the helper functions, and result in a 'ret 0'.
> + * in the helper functions.
> */
> ctx->seen |= SEEN_DATAREF | SEEN_XREG;
> PPC_LI64(r_scratch1, func);
> @@ -443,13 +441,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
> break;
>
> case BPF_S_LDX_B_MSH:
> - /*
> - * x86 version drops packet (RET 0) when K<0, whereas
> - * interpreter does allow K<0 (__load_pointer, special
> - * ancillary data). common_load returns ENOTSUPP if K<0,
> - * so we fall back to interpreter & filter works.
> - */
> - func = sk_load_byte_msh;
> + func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
> goto common_load;
> break;
>
> diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
> index 6687022..2897d7f 100644
> --- a/arch/x86/net/bpf_jit.S
> +++ b/arch/x86/net/bpf_jit.S
> @@ -18,17 +18,17 @@
> * r9d : hlen = skb->len - skb->data_len
> */
> #define SKBDATA %r8
> -
> -sk_load_word_ind:
> - .globl sk_load_word_ind
> -
> - add %ebx,%esi /* offset += X */
> -# test %esi,%esi /* if (offset < 0) goto bpf_error; */
> - js bpf_error
> +#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
>
> sk_load_word:
> .globl sk_load_word
>
> + test %esi,%esi
> + js bpf_slow_path_word_neg
> +
> +sk_load_word_positive_offset:
> + .globl sk_load_word_positive_offset
> +
> mov %r9d,%eax # hlen
> sub %esi,%eax # hlen - offset
> cmp $3,%eax
> @@ -37,16 +37,15 @@ sk_load_word:
> bswap %eax /* ntohl() */
> ret
>
> -
> -sk_load_half_ind:
> - .globl sk_load_half_ind
> -
> - add %ebx,%esi /* offset += X */
> - js bpf_error
> -
> sk_load_half:
> .globl sk_load_half
>
> + test %esi,%esi
> + js bpf_slow_path_half_neg
> +
> +sk_load_half_positive_offset:
> + .globl sk_load_half_positive_offset
> +
> mov %r9d,%eax
> sub %esi,%eax # hlen - offset
> cmp $1,%eax
> @@ -55,14 +54,15 @@ sk_load_half:
> rol $8,%ax # ntohs()
> ret
>
> -sk_load_byte_ind:
> - .globl sk_load_byte_ind
> - add %ebx,%esi /* offset += X */
> - js bpf_error
> -
> sk_load_byte:
> .globl sk_load_byte
>
> + test %esi,%esi
> + js bpf_slow_path_byte_neg
> +
> +sk_load_byte_positive_offset:
> + .globl sk_load_byte_positive_offset
> +
> cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
> jle bpf_slow_path_byte
> movzbl (SKBDATA,%rsi),%eax
> @@ -73,25 +73,21 @@ sk_load_byte:
> *
> * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
> * Must preserve A accumulator (%eax)
> - * Inputs : %esi is the offset value, already known positive
> + * Inputs : %esi is the offset value
> */
> -ENTRY(sk_load_byte_msh)
> - CFI_STARTPROC
> +sk_load_byte_msh:
> + .globl sk_load_byte_msh
> + test %esi,%esi
> + js bpf_slow_path_byte_msh_neg
> +
> +sk_load_byte_msh_positive_offset:
> + .globl sk_load_byte_msh_positive_offset
> cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
> jle bpf_slow_path_byte_msh
> movzbl (SKBDATA,%rsi),%ebx
> and $15,%bl
> shl $2,%bl
> ret
> - CFI_ENDPROC
> -ENDPROC(sk_load_byte_msh)
> -
> -bpf_error:
> -# force a return 0 from jit handler
> - xor %eax,%eax
> - mov -8(%rbp),%rbx
> - leaveq
> - ret
>
> /* rsi contains offset and can be scratched */
> #define bpf_slow_path_common(LEN) \
> @@ -138,3 +134,68 @@ bpf_slow_path_byte_msh:
> shl $2,%al
> xchg %eax,%ebx
> ret
> +
> +#define sk_negative_common(SIZE) \
> + push %rdi; /* save skb */ \
> + push %r9; \
> + push SKBDATA; \
> +/* rsi already has offset */ \
> + mov $SIZE,%ecx; /* size */ \
> + call bpf_internal_load_pointer_neg_helper; \
> + test %rax,%rax; \
> + pop SKBDATA; \
> + pop %r9; \
> + pop %rdi; \
> + jz bpf_error
> +
> +
> +bpf_slow_path_word_neg:
> + cmp SKF_MAX_NEG_OFF, %esi /* test range */
> + jl bpf_error /* offset lower -> error */
> +sk_load_word_negative_offset:
> + .globl sk_load_word_negative_offset
> + sk_negative_common(4)
> + mov (%rax), %eax
> + bswap %eax
> + ret
> +
> +bpf_slow_path_half_neg:
> + cmp SKF_MAX_NEG_OFF, %esi
> + jl bpf_error
> +sk_load_half_negative_offset:
> + .globl sk_load_half_negative_offset
> + sk_negative_common(2)
> + mov (%rax),%ax
> + rol $8,%ax
> + movzwl %ax,%eax
> + ret
> +
> +bpf_slow_path_byte_neg:
> + cmp SKF_MAX_NEG_OFF, %esi
> + jl bpf_error
> +sk_load_byte_negative_offset:
> + .globl sk_load_byte_negative_offset
> + sk_negative_common(1)
> + movzbl (%rax), %eax
> + ret
> +
> +bpf_slow_path_byte_msh_neg:
> + cmp SKF_MAX_NEG_OFF, %esi
> + jl bpf_error
> +sk_load_byte_msh_negative_offset:
> + .globl sk_load_byte_msh_negative_offset
> + xchg %eax,%ebx /* dont lose A , X is about to be scratched */
> + sk_negative_common(1)
> + movzbl (%rax),%eax
> + and $15,%al
> + shl $2,%al
> + xchg %eax,%ebx
> + ret
> +
> +bpf_error:
> +# force a return 0 from jit handler
> + xor %eax,%eax
> + mov -8(%rbp),%rbx
> + leaveq
> + ret
> +
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 5671752..39a2e2c 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
> * assembly code in arch/x86/net/bpf_jit.S
> */
> extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
> -extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
> +extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
> +extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
> +extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
> +extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
>
> static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
> {
> @@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
> set_fs(old_fs);
> }
>
> +#define CHOOSE_LOAD_FUNC(K, func) \
> + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
>
> void bpf_jit_compile(struct sk_filter *fp)
> {
> @@ -473,44 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
> #endif
> break;
> case BPF_S_LD_W_ABS:
> - func = sk_load_word;
> + func = CHOOSE_LOAD_FUNC(K, sk_load_word);
> common_load: seen |= SEEN_DATAREF;
> - if ((int)K < 0) {
> - /* Abort the JIT because __load_pointer() is needed. */
> - goto out;
> - }
> t_offset = func - (image + addrs[i]);
> EMIT1_off32(0xbe, K); /* mov imm32,%esi */
> EMIT1_off32(0xe8, t_offset); /* call */
> break;
> case BPF_S_LD_H_ABS:
> - func = sk_load_half;
> + func = CHOOSE_LOAD_FUNC(K, sk_load_half);
> goto common_load;
> case BPF_S_LD_B_ABS:
> - func = sk_load_byte;
> + func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
> goto common_load;
> case BPF_S_LDX_B_MSH:
> - if ((int)K < 0) {
> - /* Abort the JIT because __load_pointer() is needed. */
> - goto out;
> - }
> + func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
> seen |= SEEN_DATAREF | SEEN_XREG;
> - t_offset = sk_load_byte_msh - (image + addrs[i]);
> + t_offset = func - (image + addrs[i]);
> EMIT1_off32(0xbe, K); /* mov imm32,%esi */
> EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
> break;
> case BPF_S_LD_W_IND:
> - func = sk_load_word_ind;
> + func = sk_load_word;
> common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
> t_offset = func - (image + addrs[i]);
> - EMIT1_off32(0xbe, K); /* mov imm32,%esi */
> + if (K) {
> + if (is_imm8(K)) {
> + EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
> + } else {
> + EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
> + EMIT(K, 4);
> + }
> + } else {
> + EMIT2(0x89,0xde); /* mov %ebx,%esi */
> + }
> EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
> break;
> case BPF_S_LD_H_IND:
> - func = sk_load_half_ind;
> + func = sk_load_half;
> goto common_load_ind;
> case BPF_S_LD_B_IND:
> - func = sk_load_byte_ind;
> + func = sk_load_byte;
> goto common_load_ind;
> case BPF_S_JMP_JA:
> t_offset = addrs[i + K] - addrs[i];
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 5dea452..04ca613 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -41,7 +41,7 @@
> #include <linux/ratelimit.h>
>
> /* No hurry in this branch */
> -static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
> +void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
> {
> u8 *ptr = NULL;
>
> @@ -54,13 +54,14 @@ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
> return ptr;
> return NULL;
> }
> +EXPORT_SYMBOL(bpf_internal_load_pointer_neg_helper);
>
> static inline void *load_pointer(const struct sk_buff *skb, int k,
> unsigned int size, void *buffer)
> {
> if (k >= 0)
> return skb_header_pointer(skb, k, size, buffer);
> - return __load_pointer(skb, k, size);
> + return bpf_internal_load_pointer_neg_helper(skb, k, size);
> }
>
> /**
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-30 14:08 ` Eric Dumazet
2012-03-30 14:11 ` Josh Boyer
@ 2012-03-30 15:24 ` Matt Evans
1 sibling, 0 replies; 14+ messages in thread
From: Matt Evans @ 2012-03-30 15:24 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Jan Seiffert, netdev, linux-kernel, David S. Miller
Howdy Eric,
On 30 Mar 2012, at 15:08, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Fri, 2012-03-30 at 15:42 +0200, Jan Seiffert wrote:
>
>> Do you know where i can ping the powerpc guys a little bit harder?
>
> maybe using CC linux-arch@vger.kernel.org
>
> One way to handle that would be to split this patch in 3 parts, so that
> I can Ack the non ppc part.
>
> [1] introduce bpf_internal_load_pointer_neg_helper()
> [BTW no need to EXPORT_SYMBOL it, since jit is not a module ]
>
> [2] x86_bpf_iit: handle negative offsets in loads
>
> [3] ppc_bpf_jit: handle negative offsets in loads
Yeah, that'd be great, then I can stew on it without holding you up further.
Cheers,
Matt
>
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
2012-03-30 15:19 ` Matt Evans
@ 2012-03-30 15:51 ` Jan Seiffert
0 siblings, 0 replies; 14+ messages in thread
From: Jan Seiffert @ 2012-03-30 15:51 UTC (permalink / raw)
To: Matt Evans
Cc: <netdev@vger.kernel.org>,
Eric Dumazet, <linux-kernel@vger.kernel.org>
Matt Evans schrieb:
> Hi Jan,
>
> On 30 Mar 2012, at 14:42, Jan Seiffert <kaffeemonster@googlemail.com> wrote:
>
[snip]
>> Do you know where i can ping the powerpc guys a little bit harder?
>
> No need (unless you mean a different guy), I have enough guilt as it
> is! :) (Sorry for no response, am moving house and am netless.)
Oh, ok, didn't know.
> I skimmed your patches on my phone but hope to be in a state to
> review/test over the weekend or early next week. :)>>
It would be great if you could review it.
With pinging harder i meant to get anyone fluent in powerpc asm to look
at the changes i made in the asm. I think the stack setup I've done is wrong.
> Cheers,
>
> Matt
>
Greetings
Jan
>
[snip]
--
Real programmer are surprised when their
odometer does not flip over from 000009
to 00000A.
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2012-03-30 16:54 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-03-28 19:15 [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references Jan Seiffert
2012-03-28 20:05 ` Eric Dumazet
2012-03-28 20:26 ` Jan Seiffert
2012-03-28 20:39 ` Eric Dumazet
2012-03-29 11:54 ` Jan Seiffert
2012-03-29 13:57 ` Eric Dumazet
2012-03-30 9:11 ` Eric Dumazet
2012-03-30 13:42 ` Jan Seiffert
2012-03-30 14:08 ` Eric Dumazet
2012-03-30 14:11 ` Josh Boyer
2012-03-30 15:24 ` Matt Evans
2012-03-30 15:19 ` Matt Evans
2012-03-30 15:51 ` Jan Seiffert
2012-03-28 20:58 ` David Miller
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.