All of lore.kernel.org
 help / color / mirror / Atom feed
* [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
@ 2012-03-28 19:15 Jan Seiffert
  2012-03-28 20:05 ` Eric Dumazet
  2012-03-28 20:58 ` David Miller
  0 siblings, 2 replies; 14+ messages in thread
From: Jan Seiffert @ 2012-03-28 19:15 UTC (permalink / raw)
  To: netdev; +Cc: linux-kernel, David S. Miller, Matt Evans, Eric Dumazet

[-- Attachment #1: Type: text/plain, Size: 2386 bytes --]

Consider the following test program:

#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pcap-bpf.h>

#define die(x) do {perror(x); return 1;} while (0)
struct bpf_insn udp_filter[] = {
	/*   0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax	net[0] */
	/*   1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0),             /* ldb	[x+0] */
	/*   2 */ BPF_STMT(BPF_RET|BPF_A, 0),                    /* ret	a */
};

int main(int argc, char *argv[])
{
	char buf[512];
	struct sockaddr_in addr;
	struct bpf_program prg;
	socklen_t addr_s;
	ssize_t res;
	int fd;

	addr.sin_family = AF_INET;
	addr.sin_port = htons(5000);
	addr.sin_addr.s_addr = 0;
	addr_s = sizeof(addr);
	prg.bf_len = sizeof(udp_filter)/sizeof(udp_filter[0]);
	prg.bf_insns = udp_filter;
	if(-1 == (fd = socket(AF_INET, SOCK_DGRAM, 0)))
		die("socket");
	if(-1 == bind(fd, (struct sockaddr *)&addr, sizeof(addr)))
		die("bind");
	if(-1 == setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prg, sizeof(prg)))
		die("setsockopt");
	res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr *)&addr, &addr_s);
	if(res != -1)
		printf("packet received: %zi bytes\n", res);
	else
		die("recvfrom");
	return 0;
}

when used with the bpf jit disabled works:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1: packet received: 6 bytes

When the bpf jit gets enabled (echo 100 >
/proc/sys/net/core/bpf_jit_enable) the same program stops working:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1:

The reason is that both jits (x86 and powerpc) do not handle negative
memory references like SKF_NET_OFF or SKF_LL_OFF, only the simple
ancillary data references are supported (by mapping to special
instructions).
In the case of an absolute reference, the jit aborts the translation
if a negative reference is seen, also a negative k on the indirect
load aborts the translation, but if X is negative to begin with, only
the error handler is reached at runtime which drops the whole packet.

I propose the following patch to fix this situation.
Lightly tested on x86, but the powerpc asm part is prop. wrong.

Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>

-- 
I know this is not a 100% submission, please refrain from shooting me,
i'm not really set up for professional kernel development, i just want
this fixed.

Greetings
Jan

[-- Attachment #2: bpf_neg.diff --]
[-- Type: application/octet-stream, Size: 10559 bytes --]

diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index ff4506e..87dc6e4 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -31,14 +31,11 @@
  * then branch directly to slow_path_XXX if required.  (In fact, could
  * load a spare GPR with the address of slow_path_generic and pass size
  * as an argument, making the call site a mtlr, li and bllr.)
- *
- * Technically, the "is addr < 0" check is unnecessary & slowing down
- * the ABS path, as it's statically checked on generation.
  */
 	.globl	sk_load_word
 sk_load_word:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_neg_word
 	/* Are we accessing past headlen? */
 	subi	r_scratch1, r_HL, 4
 	cmpd	r_scratch1, r_addr
@@ -51,7 +48,7 @@ sk_load_word:
 	.globl	sk_load_half
 sk_load_half:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_neg_half
 	subi	r_scratch1, r_HL, 2
 	cmpd	r_scratch1, r_addr
 	blt	bpf_slow_path_half
@@ -61,7 +58,7 @@ sk_load_half:
 	.globl	sk_load_byte
 sk_load_byte:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_neg_byte
 	cmpd	r_HL, r_addr
 	ble	bpf_slow_path_byte
 	lbzx	r_A, r_D, r_addr
@@ -69,16 +66,22 @@ sk_load_byte:
 
 /*
  * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
- * r_addr is the offset value, already known positive
+ * r_addr is the offset value
  */
 	.globl sk_load_byte_msh
 sk_load_byte_msh:
+	cmpdi	r_addr, 0
+	blt	bpf_slow_path_neg_byte_msh
 	cmpd	r_HL, r_addr
 	ble	bpf_slow_path_byte_msh
 	lbzx	r_X, r_D, r_addr
 	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
 	blr
 
+bpf_error_slow:
+	/* fabricate a cr0 = lt */
+	li	r_scratch1, -1
+	cmpdi	r_scratch1, 0
 bpf_error:
 	/* Entered with cr0 = lt */
 	li	r3, 0
@@ -136,3 +139,57 @@ bpf_slow_path_byte_msh:
 	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
 	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
 	blr
+
+/* Call out to bpf_internal_load_pointer_neg_helper:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define bpf_slow_path_neg_common(SIZE)				\
+	lis     r_scratch1,-32; /* SKF_LL_OFF */		\
+	cmpd	r_addr, r_scratch1; /* addr < SKF_* */		\
+	blt	bpf_error; /* cr0 = LT */			\
+	mflr	r0;						\
+	std	r0, 16(r1);					\
+	/* R3 goes in parameter space of caller's frame */	\
+	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
+	/* R3 = r_skb, as passed */				\
+	mr	r4, r_addr;					\
+	li	r5, SIZE;					\
+	bl	bpf_internal_load_pointer_neg_helper;		\
+	/* R3 != 0 on success */				\
+	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
+	ld	r0, 16(r1);					\
+	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	mtlr	r0;						\
+	cmpldi	r3, 0;						\
+	beq	bpf_error_slow;	/* cr0 = EQ */			\
+	mr	r_addr, r3;					\
+	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	/* Great success! */
+
+bpf_slow_path_neg_word:
+	bpf_slow_path_neg_common(4)
+	lwz	r_A, (r_addr)
+	blr
+
+bpf_slow_path_neg_half:
+	bpf_slow_path_neg_common(2)
+	lhz	r_A, (r_addr)
+	blr
+
+bpf_slow_path_neg_byte:
+	bpf_slow_path_neg_common(1)
+	lbz	r_A, (r_addr)
+	blr
+
+bpf_slow_path_neg_byte_msh:
+	bpf_slow_path_neg_common(1)
+	lbz	r_X, (r_addr)
+	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
+	blr
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 73619d3..0019f70 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -400,12 +400,10 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			func = sk_load_byte;
 		common_load:
 			/*
-			 * Load from [K].  Reference with the (negative)
-			 * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
+			 * Load from [K]. Negative offsets are tested for
+			 * in the helper functions.
 			 */
 			ctx->seen |= SEEN_DATAREF;
-			if ((int)K < 0)
-				return -ENOTSUPP;
 			PPC_LI64(r_scratch1, func);
 			PPC_MTLR(r_scratch1);
 			PPC_LI32(r_addr, K);
@@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 		common_load_ind:
 			/*
 			 * Load from [X + K].  Negative offsets are tested for
-			 * in the helper functions, and result in a 'ret 0'.
+			 * in the helper functions.
 			 */
 			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
 			PPC_LI64(r_scratch1, func);
@@ -443,12 +441,6 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			break;
 
 		case BPF_S_LDX_B_MSH:
-			/*
-			 * x86 version drops packet (RET 0) when K<0, whereas
-			 * interpreter does allow K<0 (__load_pointer, special
-			 * ancillary data).  common_load returns ENOTSUPP if K<0,
-			 * so we fall back to interpreter & filter works.
-			 */
 			func = sk_load_byte_msh;
 			goto common_load;
 			break;
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6687022..1201783 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,13 @@
  * r9d : hlen = skb->len - skb->data_len
  */
 #define SKBDATA	%r8
-
-sk_load_word_ind:
-	.globl	sk_load_word_ind
-
-	add	%ebx,%esi	/* offset += X */
-#	test    %esi,%esi	/* if (offset < 0) goto bpf_error; */
-	js	bpf_error
+#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
 
 sk_load_word:
 	.globl	sk_load_word
 
+	test	%esi,%esi
+	js	bpf_slow_path_word_neg
 	mov	%r9d,%eax		# hlen
 	sub	%esi,%eax		# hlen - offset
 	cmp	$3,%eax
@@ -37,16 +33,11 @@ sk_load_word:
 	bswap   %eax  			/* ntohl() */
 	ret
 
-
-sk_load_half_ind:
-	.globl sk_load_half_ind
-
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
 sk_load_half:
 	.globl	sk_load_half
 
+	test	%esi,%esi
+	js	bpf_slow_path_half_neg
 	mov	%r9d,%eax
 	sub	%esi,%eax		#	hlen - offset
 	cmp	$1,%eax
@@ -55,14 +46,11 @@ sk_load_half:
 	rol	$8,%ax			# ntohs()
 	ret
 
-sk_load_byte_ind:
-	.globl sk_load_byte_ind
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
 sk_load_byte:
 	.globl	sk_load_byte
 
+	test	%esi,%esi
+	js	bpf_slow_path_byte_neg
 	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
 	jle	bpf_slow_path_byte
 	movzbl	(SKBDATA,%rsi),%eax
@@ -73,10 +61,12 @@ sk_load_byte:
  *
  * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
  * Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
  */
 ENTRY(sk_load_byte_msh)
 	CFI_STARTPROC
+	test	%esi,%esi
+	js	bpf_slow_path_byte_msh_neg
 	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
 	jle	bpf_slow_path_byte_msh
 	movzbl	(SKBDATA,%rsi),%ebx
@@ -138,3 +128,45 @@ bpf_slow_path_byte_msh:
 	shl	$2,%al
 	xchg	%eax,%ebx
 	ret
+
+#define bpf_slow_path_neg_common(SIZE)				\
+	cmp	SKF_MAX_NEG_OFF, %esi;	/* test range */	\
+	jl	bpf_error;	/* offset lower -> error  */	\
+	push	%rdi;	/* save skb */				\
+	push	%r9;						\
+	push	SKBDATA;					\
+/* rsi already has offset */					\
+	mov	$SIZE,%ecx;	/* size */			\
+	call	bpf_internal_load_pointer_neg_helper;		\
+	test	%rax,%rax;					\
+	pop	SKBDATA;					\
+	pop	%r9;						\
+	pop	%rdi;						\
+	jz	bpf_error
+
+bpf_slow_path_word_neg:
+	bpf_slow_path_neg_common(4)
+	mov	(%rax), %eax
+	bswap	%eax
+	ret
+
+bpf_slow_path_half_neg:
+	bpf_slow_path_neg_common(2)
+	mov	(%rax),%ax
+	rol	$8,%ax
+	movzwl	%ax,%eax
+	ret
+
+bpf_slow_path_byte_neg:
+	bpf_slow_path_neg_common(1)
+	movzbl	(%rax), %eax
+	ret
+
+bpf_slow_path_byte_msh_neg:
+	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
+	bpf_slow_path_neg_common(1)
+	movzbl	(%rax),%eax
+	and	$15,%al
+	shl	$2,%al
+	xchg	%eax,%ebx
+	ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5671752..3c4a454 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,6 @@ int bpf_jit_enable __read_mostly;
  * assembly code in arch/x86/net/bpf_jit.S
  */
 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
 
 static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -475,10 +474,6 @@ void bpf_jit_compile(struct sk_filter *fp)
 			case BPF_S_LD_W_ABS:
 				func = sk_load_word;
 common_load:			seen |= SEEN_DATAREF;
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
 				t_offset = func - (image + addrs[i]);
 				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call */
@@ -490,27 +485,28 @@ common_load:			seen |= SEEN_DATAREF;
 				func = sk_load_byte;
 				goto common_load;
 			case BPF_S_LDX_B_MSH:
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
 				seen |= SEEN_DATAREF | SEEN_XREG;
 				t_offset = sk_load_byte_msh - (image + addrs[i]);
 				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
 				break;
 			case BPF_S_LD_W_IND:
-				func = sk_load_word_ind;
+				func = sk_load_word;
 common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
+				if (K) {
+					EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+					EMIT(K, 4);
+				} else {
+					EMIT2(0x89,0xde); /* mov %ebx,%esi */
+				}
 				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
 				break;
 			case BPF_S_LD_H_IND:
-				func = sk_load_half_ind;
+				func = sk_load_half;
 				goto common_load_ind;
 			case BPF_S_LD_B_IND:
-				func = sk_load_byte_ind;
+				func = sk_load_byte;
 				goto common_load_ind;
 			case BPF_S_JMP_JA:
 				t_offset = addrs[i + K] - addrs[i];
diff --git a/net/core/filter.c b/net/core/filter.c
index 5dea452..04ca613 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -41,7 +41,7 @@
 #include <linux/ratelimit.h>
 
 /* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
 {
 	u8 *ptr = NULL;
 
@@ -54,13 +54,14 @@ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
 		return ptr;
 	return NULL;
 }
+EXPORT_SYMBOL(bpf_internal_load_pointer_neg_helper);
 
 static inline void *load_pointer(const struct sk_buff *skb, int k,
 				 unsigned int size, void *buffer)
 {
 	if (k >= 0)
 		return skb_header_pointer(skb, k, size, buffer);
-	return __load_pointer(skb, k, size);
+	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
 /**

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-28 19:15 [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references Jan Seiffert
@ 2012-03-28 20:05 ` Eric Dumazet
  2012-03-28 20:26   ` Jan Seiffert
  2012-03-28 20:58 ` David Miller
  1 sibling, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2012-03-28 20:05 UTC (permalink / raw)
  To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans

On Wed, 2012-03-28 at 21:15 +0200, Jan Seiffert wrote:
> Consider the following test program:
> 
> #include <stdio.h>
> #include <sys/types.h>
> #include <sys/socket.h>
> #include <netinet/in.h>
> #include <pcap-bpf.h>
> 
> #define die(x) do {perror(x); return 1;} while (0)
> struct bpf_insn udp_filter[] = {
> 	/*   0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax	net[0] */
> 	/*   1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0),             /* ldb	[x+0] */
> 	/*   2 */ BPF_STMT(BPF_RET|BPF_A, 0),                    /* ret	a */
> };

When this point was raised some weeks ago, we wanted to see a _real_ use
of negative mem reference.

You provide a test program but what this filter is supposed to do
exactly ?




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-28 20:05 ` Eric Dumazet
@ 2012-03-28 20:26   ` Jan Seiffert
  2012-03-28 20:39     ` Eric Dumazet
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Seiffert @ 2012-03-28 20:26 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans

2012/3/28 Eric Dumazet <eric.dumazet@gmail.com>:
> On Wed, 2012-03-28 at 21:15 +0200, Jan Seiffert wrote:
>> Consider the following test program:
>>
>> #include <stdio.h>
>> #include <sys/types.h>
>> #include <sys/socket.h>
>> #include <netinet/in.h>
>> #include <pcap-bpf.h>
>>
>> #define die(x) do {perror(x); return 1;} while (0)
>> struct bpf_insn udp_filter[] = {
>>       /*   0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax        net[0] */
>>       /*   1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0),             /* ldb [x+0] */
>>       /*   2 */ BPF_STMT(BPF_RET|BPF_A, 0),                    /* ret a */
>> };
>
> When this point was raised some weeks ago, we wanted to see a _real_ use
> of negative mem reference.
>
> You provide a test program but what this filter is supposed to do
> exactly ?
>

Say you have a UDP socket, and you want to filter for bogus source
addresses (drop already in kernel to save the context switch).
To have only one bpf program for ipv4 and ipv6 (you have to checked
the same bogus v4 addresses in mapped space), there is a point where
it elegant to have a negative offset saved in the X register.
This is how i found the Bug.

But use case or not, the jits behavior is different than the interpreter.

Example (not the complete program):

struct bpf_insn UDPPValid[] = {
        /*   0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(12)),
        /*   1 */ BPF_STMT(BPF_LD|BPF_B|BPF_ABS, -1048576+(0)),
        /*   2 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xf0),
        /*   3 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x40, 22 - 4, 0),
        /*   4 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x60, 5 - 5, 39 - 5),
        /*   5 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 12 - 6, 0),
        /*   6 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x20010DB8, 39 - 7, 0),
        /*   7 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x20010002, 18 - 8, 0),
        /*   8 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xfffffff0),
        /*   9 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x20010010, 39 - 10, 0),
        /*  10 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xff000000),
        /*  11 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xff000000, 39 - 12, 37 - 12),
        /*  12 */ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, -1048576+(12)),
        /*  13 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 0, 37 - 14),
        /*  14 */ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, -1048576+(16)),
        /*  15 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xffff, 21 - 16, 0),
        /*  16 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0064FF9B, 21 - 17, 0),
        /*  17 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 39 - 18, 37 -
18),
        /*  18 */ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, -1048576+(12)),
        /*  19 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xffff0000),
        /*  20 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 39 - 21, 37 -
21),
        /*  21 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(20)),
        /*  22 */ BPF_STMT(BPF_LD|BPF_W|BPF_IND, 0),
        /*  23 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xffffffff, 39 - 24,
0),
        /*  24 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xffffff00),
        /*  25 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC0000000, 39 - 26,
0),
        /*  26 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC0000200, 39 - 27,
0),
        /*  27 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC6336400, 39 - 28,
0),
        /*  28 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xCB007100, 39 - 29,
0),
        /*  29 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC0586300, 39 - 30,
0),
        /*  30 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xfffe0000),
        /*  31 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xC6120000, 39 - 32,
0),
        /*  32 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xff000000),
        /*  33 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0, 39 - 34, 0),
        /*  34 */ BPF_STMT(BPF_ALU|BPF_AND|BPF_K, 0xf0000000),
        /*  35 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xE0000000, 39 - 36,
0),
        /*  36 */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xF0000000, 39 - 37,
0),
        /*  37 */ BPF_STMT(BPF_LD|BPF_W|BPF_LEN, 0),
        /*  38 */ BPF_STMT(BPF_RET|BPF_A, 0),
        /*  39 */ BPF_STMT(BPF_RET|BPF_K, 0),
};

>
>

Greetings
Jan

-- 
˙qɐɥ ʇɟnɐʞǝƃ ʎɐqǝ ıǝq ɹnʇɐʇsɐʇ ǝuıǝ ɹıɯ ɥɔı sɐp lɐɯ ǝʇzʇǝl sɐp ʇsı sɐp
'ʇɯɯɐpɹǝʌ

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-28 20:26   ` Jan Seiffert
@ 2012-03-28 20:39     ` Eric Dumazet
  2012-03-29 11:54       ` Jan Seiffert
  0 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2012-03-28 20:39 UTC (permalink / raw)
  To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans

On Wed, 2012-03-28 at 22:26 +0200, Jan Seiffert wrote:
> 2012/3/28 Eric Dumazet <eric.dumazet@gmail.com>:
> > On Wed, 2012-03-28 at 21:15 +0200, Jan Seiffert wrote:
> >> Consider the following test program:
> >>
> >> #include <stdio.h>
> >> #include <sys/types.h>
> >> #include <sys/socket.h>
> >> #include <netinet/in.h>
> >> #include <pcap-bpf.h>
> >>
> >> #define die(x) do {perror(x); return 1;} while (0)
> >> struct bpf_insn udp_filter[] = {
> >>       /*   0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax        net[0] */
> >>       /*   1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0),             /* ldb [x+0] */
> >>       /*   2 */ BPF_STMT(BPF_RET|BPF_A, 0),                    /* ret a */
> >> };
> >
> > When this point was raised some weeks ago, we wanted to see a _real_ use
> > of negative mem reference.
> >
> > You provide a test program but what this filter is supposed to do
> > exactly ?
> >
> 
> Say you have a UDP socket, and you want to filter for bogus source
> addresses (drop already in kernel to save the context switch).
> To have only one bpf program for ipv4 and ipv6 (you have to checked
> the same bogus v4 addresses in mapped space), there is a point where
> it elegant to have a negative offset saved in the X register.

Cool, thats a valid use, thanks.


Problem is you slow down the jit in its normal use, for a very specific
use. 

Please rework your patch so that absolute loads of positive offsets
(known at compile time) dont have to test negative offsets at run time.

You add two instructions per load, and thats not good.

Something like :

sk_load_word:
        .globl  sk_load_word
 
       test    %esi,%esi
       js      bpf_slow_path_word_neg

sk_load_word_positive_offset:
	.globl sk_load_word_positive_offset

        mov     %r9d,%eax               # hlen
        sub     %esi,%eax               # hlen - offset
        cmp     $3,%eax

...



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-28 19:15 [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references Jan Seiffert
  2012-03-28 20:05 ` Eric Dumazet
@ 2012-03-28 20:58 ` David Miller
  1 sibling, 0 replies; 14+ messages in thread
From: David Miller @ 2012-03-28 20:58 UTC (permalink / raw)
  To: kaffeemonster; +Cc: netdev, linux-kernel, matt, eric.dumazet


Please do not post patches as octet-stream attachments, post them
inline so that:

1) People can more easily review and quote the code in replies.

2) It actually gets logged properly in patchwork.

Otherwise I can guarentee your patch will get less attention than
you would like.

Thanks.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-28 20:39     ` Eric Dumazet
@ 2012-03-29 11:54       ` Jan Seiffert
  2012-03-29 13:57         ` Eric Dumazet
  2012-03-30  9:11         ` Eric Dumazet
  0 siblings, 2 replies; 14+ messages in thread
From: Jan Seiffert @ 2012-03-29 11:54 UTC (permalink / raw)
  To: netdev; +Cc: Eric Dumazet, linux-kernel, David S. Miller, Matt Evans

Eric Dumazet schrieb:
> On Wed, 2012-03-28 at 22:26 +0200, Jan Seiffert wrote:
> [snip]
>> Say you have a UDP socket, and you want to filter for bogus source
>> addresses (drop already in kernel to save the context switch).
>> To have only one bpf program for ipv4 and ipv6 (you have to checked
>> the same bogus v4 addresses in mapped space), there is a point where
>> it elegant to have a negative offset saved in the X register.
> Cool, thats a valid use, thanks.
>
>
> Problem is you slow down the jit in its normal use, for a very specific
> use. 
>
> Please rework your patch so that absolute loads of positive offsets
> (known at compile time) dont have to test negative offsets at run time.
>
> You add two instructions per load, and thats not good.
>
> Something like :
>
> sk_load_word:
>         .globl  sk_load_word
>  
>        test    %esi,%esi
>        js      bpf_slow_path_word_neg
>
> sk_load_word_positive_offset:
> 	.globl sk_load_word_positive_offset
>
>         mov     %r9d,%eax               # hlen
>         sub     %esi,%eax               # hlen - offset
>         cmp     $3,%eax
>
> ...
>
>
>
Ok, to keep the ball rolling here is a V2 with the changes you suggested:

Consider the following test program:

#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pcap-bpf.h>

#define die(x) do {perror(x); return 1;} while (0)
struct bpf_insn udp_filter[] = {
	/*   0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax	net[0] */
	/*   1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0),             /* ldb	[x+0] */
	/*   2 */ BPF_STMT(BPF_RET|BPF_A, 0),                    /* ret	a */
};

int main(int argc, char *argv[])
{
	char buf[512];
	struct sockaddr_in addr;
	struct bpf_program prg;
	socklen_t addr_s;
	ssize_t res;
	int fd;

	addr.sin_family = AF_INET;
	addr.sin_port = htons(5000);
	addr.sin_addr.s_addr = 0;
	addr_s = sizeof(addr);
	prg.bf_len = sizeof(udp_filter)/sizeof(udp_filter[0]);
	prg.bf_insns = udp_filter;
	if(-1 == (fd = socket(AF_INET, SOCK_DGRAM, 0)))
		die("socket");
	if(-1 == bind(fd, (struct sockaddr *)&addr, sizeof(addr)))
		die("bind");
	if(-1 == setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prg, sizeof(prg)))
		die("setsockopt");
	res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr *)&addr, &addr_s);
	if(res != -1)
		printf("packet received: %zi bytes\n", res);
	else
		die("recvfrom");
	return 0;
}

when used with the bpf jit disabled works:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1: packet received: 6 bytes

When the bpf jit gets enabled (echo 100 >
/proc/sys/net/core/bpf_jit_enable) the same program stops working:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1:

The reason is that both jits (x86 and powerpc) do not handle negative
memory references like SKF_NET_OFF or SKF_LL_OFF, only the simple
ancillary data references are supported (by mapping to special
instructions).
In the case of an absolute reference, the jit aborts the translation
if a negative reference is seen, also a negative k on the indirect
load aborts the translation, but if X is negative to begin with, only
the error handler is reached at runtime which drops the whole packet.

I propose the following patch to fix this situation.
Lightly tested on x86, but the powerpc asm part is prop. wrong.


Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index af1ab5e..e9b57b3 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -49,6 +49,10 @@
  * Assembly helpers from arch/powerpc/net/bpf_jit.S:
  */
 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
 
 #define FUNCTION_DESCR_SIZE	24
 
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index ff4506e..e590aa5 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -31,14 +31,13 @@
  * then branch directly to slow_path_XXX if required.  (In fact, could
  * load a spare GPR with the address of slow_path_generic and pass size
  * as an argument, making the call site a mtlr, li and bllr.)
- *
- * Technically, the "is addr < 0" check is unnecessary & slowing down
- * the ABS path, as it's statically checked on generation.
  */
 	.globl	sk_load_word
 sk_load_word:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_word_neg
+	.globl	sk_load_word_positive_offset
+sk_load_word_positive_offset:
 	/* Are we accessing past headlen? */
 	subi	r_scratch1, r_HL, 4
 	cmpd	r_scratch1, r_addr
@@ -51,7 +50,9 @@ sk_load_word:
 	.globl	sk_load_half
 sk_load_half:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_half_neg
+	.globl	sk_load_half_positive_offset
+sk_load_half_positive_offset:
 	subi	r_scratch1, r_HL, 2
 	cmpd	r_scratch1, r_addr
 	blt	bpf_slow_path_half
@@ -61,7 +62,9 @@ sk_load_half:
 	.globl	sk_load_byte
 sk_load_byte:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_byte_neg
+	.globl	sk_load_byte_positive_offset
+sk_load_byte_positive_offset:
 	cmpd	r_HL, r_addr
 	ble	bpf_slow_path_byte
 	lbzx	r_A, r_D, r_addr
@@ -69,22 +72,20 @@ sk_load_byte:
 
 /*
  * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
- * r_addr is the offset value, already known positive
+ * r_addr is the offset value
  */
 	.globl sk_load_byte_msh
 sk_load_byte_msh:
+	cmpdi	r_addr, 0
+	blt	bpf_slow_path_byte_msh_neg
+	.globl sk_load_byte_msh_positive_offset
+sk_load_byte_msh_positive_offset:
 	cmpd	r_HL, r_addr
 	ble	bpf_slow_path_byte_msh
 	lbzx	r_X, r_D, r_addr
 	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
 	blr
 
-bpf_error:
-	/* Entered with cr0 = lt */
-	li	r3, 0
-	/* Generated code will 'blt epilogue', returning 0. */
-	blr
-
 /* Call out to skb_copy_bits:
  * We'll need to back up our volatile regs first; we have
  * local variable space at r1+(BPF_PPC_STACK_BASIC).
@@ -136,3 +137,85 @@ bpf_slow_path_byte_msh:
 	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
 	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
 	blr
+
+/* Call out to bpf_internal_load_pointer_neg_helper:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define sk_negative_common(SIZE)				\
+	mflr	r0;						\
+	std	r0, 16(r1);					\
+	/* R3 goes in parameter space of caller's frame */	\
+	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
+	/* R3 = r_skb, as passed */				\
+	mr	r4, r_addr;					\
+	li	r5, SIZE;					\
+	bl	bpf_internal_load_pointer_neg_helper;		\
+	/* R3 != 0 on success */				\
+	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
+	ld	r0, 16(r1);					\
+	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	mtlr	r0;						\
+	cmpldi	r3, 0;						\
+	beq	bpf_error_slow;	/* cr0 = EQ */			\
+	mr	r_addr, r3;					\
+	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	/* Great success! */
+
+bpf_slow_path_word_neg:
+	lis     r_scratch1,-32	/* SKF_LL_OFF */
+	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	blt	bpf_error	/* cr0 = LT */
+	.globl	sk_load_word_negative_offset
+sk_load_word_negative_offset:
+	sk_negative_common(4)
+	lwz	r_A, 0(r_addr)
+	blr
+
+bpf_slow_path_half_neg:
+	lis     r_scratch1,-32	/* SKF_LL_OFF */
+	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	blt	bpf_error	/* cr0 = LT */
+	.globl	sk_load_half_negative_offset
+sk_load_half_negative_offset:
+	sk_negative_common(2)
+	lhz	r_A, 0(r_addr)
+	blr
+
+bpf_slow_path_byte_neg:
+	lis     r_scratch1,-32	/* SKF_LL_OFF */
+	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	blt	bpf_error	/* cr0 = LT */
+	.globl	sk_load_byte_negative_offset
+sk_load_byte_negative_offset:
+	sk_negative_common(1)
+	lbz	r_A, 0(r_addr)
+	blr
+
+bpf_slow_path_byte_msh_neg:
+	lis     r_scratch1,-32	/* SKF_LL_OFF */
+	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	blt	bpf_error	/* cr0 = LT */
+	.globl	sk_load_byte_msh_negative_offset
+sk_load_byte_msh_negative_offset:
+	sk_negative_common(1)
+	lbz	r_X, 0(r_addr)
+	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
+	blr
+
+bpf_error_slow:
+	/* fabricate a cr0 = lt */
+	li	r_scratch1, -1
+	cmpdi	r_scratch1, 0
+bpf_error:
+	/* Entered with cr0 = lt */
+	li	r3, 0
+	/* Generated code will 'blt epilogue', returning 0. */
+	blr
+
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 73619d3..2dc8b14 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -127,6 +127,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	PPC_BLR();
 }
 
+#define CHOOSE_LOAD_FUNC(K, func) \
+	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+
 /* Assemble the body code between the prologue & epilogue. */
 static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			      struct codegen_context *ctx,
@@ -391,21 +394,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 
 			/*** Absolute loads from packet header/data ***/
 		case BPF_S_LD_W_ABS:
-			func = sk_load_word;
+			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
 			goto common_load;
 		case BPF_S_LD_H_ABS:
-			func = sk_load_half;
+			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
 			goto common_load;
 		case BPF_S_LD_B_ABS:
-			func = sk_load_byte;
+			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
 		common_load:
-			/*
-			 * Load from [K].  Reference with the (negative)
-			 * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
-			 */
+			/* Load from [K]. */
 			ctx->seen |= SEEN_DATAREF;
-			if ((int)K < 0)
-				return -ENOTSUPP;
 			PPC_LI64(r_scratch1, func);
 			PPC_MTLR(r_scratch1);
 			PPC_LI32(r_addr, K);
@@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 		common_load_ind:
 			/*
 			 * Load from [X + K].  Negative offsets are tested for
-			 * in the helper functions, and result in a 'ret 0'.
+			 * in the helper functions.
 			 */
 			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
 			PPC_LI64(r_scratch1, func);
@@ -443,13 +441,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			break;
 
 		case BPF_S_LDX_B_MSH:
-			/*
-			 * x86 version drops packet (RET 0) when K<0, whereas
-			 * interpreter does allow K<0 (__load_pointer, special
-			 * ancillary data).  common_load returns ENOTSUPP if K<0,
-			 * so we fall back to interpreter & filter works.
-			 */
-			func = sk_load_byte_msh;
+			func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
 			goto common_load;
 			break;
 
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6687022..63ae130 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,18 @@
  * r9d : hlen = skb->len - skb->data_len
  */
 #define SKBDATA	%r8
+#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
 
-sk_load_word_ind:
-	.globl	sk_load_word_ind
-
-	add	%ebx,%esi	/* offset += X */
-#	test    %esi,%esi	/* if (offset < 0) goto bpf_error; */
-	js	bpf_error
-
+	.p2align 1
 sk_load_word:
 	.globl	sk_load_word
 
+	test	%esi,%esi
+	js	bpf_slow_path_word_neg
+
+sk_load_word_positive_offset:
+	.globl	sk_load_word_positive_offset
+
 	mov	%r9d,%eax		# hlen
 	sub	%esi,%eax		# hlen - offset
 	cmp	$3,%eax
@@ -37,16 +38,16 @@ sk_load_word:
 	bswap   %eax  			/* ntohl() */
 	ret
 
-
-sk_load_half_ind:
-	.globl sk_load_half_ind
-
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
+	.p2align 1
 sk_load_half:
 	.globl	sk_load_half
 
+	test	%esi,%esi
+	js	bpf_slow_path_half_neg
+
+sk_load_half_positive_offset:
+	.globl	sk_load_half_positive_offset
+
 	mov	%r9d,%eax
 	sub	%esi,%eax		#	hlen - offset
 	cmp	$1,%eax
@@ -55,14 +56,16 @@ sk_load_half:
 	rol	$8,%ax			# ntohs()
 	ret
 
-sk_load_byte_ind:
-	.globl sk_load_byte_ind
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
+	.p2align 1
 sk_load_byte:
 	.globl	sk_load_byte
 
+	test	%esi,%esi
+	js	bpf_slow_path_byte_neg
+
+sk_load_byte_positive_offset:
+	.globl	sk_load_byte_positive_offset
+
 	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
 	jle	bpf_slow_path_byte
 	movzbl	(SKBDATA,%rsi),%eax
@@ -73,25 +76,22 @@ sk_load_byte:
  *
  * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
  * Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
  */
-ENTRY(sk_load_byte_msh)
-	CFI_STARTPROC
+	.p2align 1
+sk_load_byte_msh:
+	.globl	sk_load_byte_msh
+	test	%esi,%esi
+	js	bpf_slow_path_byte_msh_neg
+
+sk_load_byte_msh_positive_offset:
+	.globl	sk_load_byte_msh_positive_offset
 	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
 	jle	bpf_slow_path_byte_msh
 	movzbl	(SKBDATA,%rsi),%ebx
 	and	$15,%bl
 	shl	$2,%bl
 	ret
-	CFI_ENDPROC
-ENDPROC(sk_load_byte_msh)
-
-bpf_error:
-# force a return 0 from jit handler
-	xor		%eax,%eax
-	mov		-8(%rbp),%rbx
-	leaveq
-	ret
 
 /* rsi contains offset and can be scratched */
 #define bpf_slow_path_common(LEN)		\
@@ -108,6 +108,7 @@ bpf_error:
 	pop	%rdi
 
 
+	.p2align 1
 bpf_slow_path_word:
 	bpf_slow_path_common(4)
 	js	bpf_error
@@ -115,6 +116,7 @@ bpf_slow_path_word:
 	bswap	%eax
 	ret
 
+	.p2align 1
 bpf_slow_path_half:
 	bpf_slow_path_common(2)
 	js	bpf_error
@@ -123,12 +125,14 @@ bpf_slow_path_half:
 	movzwl	%ax,%eax
 	ret
 
+	.p2align 1
 bpf_slow_path_byte:
 	bpf_slow_path_common(1)
 	js	bpf_error
 	movzbl	-12(%rbp),%eax
 	ret
 
+	.p2align 1
 bpf_slow_path_byte_msh:
 	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
 	bpf_slow_path_common(1)
@@ -138,3 +142,73 @@ bpf_slow_path_byte_msh:
 	shl	$2,%al
 	xchg	%eax,%ebx
 	ret
+
+#define sk_negative_common(SIZE)				\
+	push	%rdi;	/* save skb */				\
+	push	%r9;						\
+	push	SKBDATA;					\
+/* rsi already has offset */					\
+	mov	$SIZE,%ecx;	/* size */			\
+	call	bpf_internal_load_pointer_neg_helper;		\
+	test	%rax,%rax;					\
+	pop	SKBDATA;					\
+	pop	%r9;						\
+	pop	%rdi;						\
+	jz	bpf_error
+
+
+	.p2align 1
+bpf_slow_path_word_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
+	jl	bpf_error	/* offset lower -> error  */
+sk_load_word_negative_offset:
+	.globl	sk_load_word_negative_offset
+	sk_negative_common(4)
+	mov	(%rax), %eax
+	bswap	%eax
+	ret
+
+	.p2align 1
+bpf_slow_path_half_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi
+	jl	bpf_error
+sk_load_half_negative_offset:
+	.globl	sk_load_half_negative_offset
+	sk_negative_common(2)
+	mov	(%rax),%ax
+	rol	$8,%ax
+	movzwl	%ax,%eax
+	ret
+
+	.p2align 1
+bpf_slow_path_byte_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi
+	jl	bpf_error
+sk_load_byte_negative_offset:
+	.globl	sk_load_byte_negative_offset
+	sk_negative_common(1)
+	movzbl	(%rax), %eax
+	ret
+
+	.p2align 1
+bpf_slow_path_byte_msh_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi
+	jl	bpf_error
+sk_load_byte_msh_negative_offset:
+	.globl	sk_load_byte_msh_negative_offset
+	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
+	sk_negative_common(1)
+	movzbl	(%rax),%eax
+	and	$15,%al
+	shl	$2,%al
+	xchg	%eax,%ebx
+	ret
+
+	.p2align 1
+bpf_error:
+# force a return 0 from jit handler
+	xor		%eax,%eax
+	mov		-8(%rbp),%rbx
+	leaveq
+	ret
+
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5671752..c20374f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
  * assembly code in arch/x86/net/bpf_jit.S
  */
 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
 
 static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
 	set_fs(old_fs);
 }
 
+#define CHOOSE_LOAD_FUNC(K, func) \
+	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
 void bpf_jit_compile(struct sk_filter *fp)
 {
@@ -473,44 +478,42 @@ void bpf_jit_compile(struct sk_filter *fp)
 #endif
 				break;
 			case BPF_S_LD_W_ABS:
-				func = sk_load_word;
+				func = CHOOSE_LOAD_FUNC(K, sk_load_word);
 common_load:			seen |= SEEN_DATAREF;
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
 				t_offset = func - (image + addrs[i]);
 				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call */
 				break;
 			case BPF_S_LD_H_ABS:
-				func = sk_load_half;
+				func = CHOOSE_LOAD_FUNC(K, sk_load_half);
 				goto common_load;
 			case BPF_S_LD_B_ABS:
-				func = sk_load_byte;
+				func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
 				goto common_load;
 			case BPF_S_LDX_B_MSH:
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
+				func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
 				seen |= SEEN_DATAREF | SEEN_XREG;
-				t_offset = sk_load_byte_msh - (image + addrs[i]);
+				t_offset = func - (image + addrs[i]);
 				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
 				break;
 			case BPF_S_LD_W_IND:
-				func = sk_load_word_ind;
+				func = sk_load_word;
 common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
+				if (K) {
+					EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+					EMIT(K, 4);
+				} else {
+					EMIT2(0x89,0xde); /* mov %ebx,%esi */
+				}
 				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
 				break;
 			case BPF_S_LD_H_IND:
-				func = sk_load_half_ind;
+				func = sk_load_half;
 				goto common_load_ind;
 			case BPF_S_LD_B_IND:
-				func = sk_load_byte_ind;
+				func = sk_load_byte;
 				goto common_load_ind;
 			case BPF_S_JMP_JA:
 				t_offset = addrs[i + K] - addrs[i];
diff --git a/net/core/filter.c b/net/core/filter.c
index 5dea452..04ca613 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -41,7 +41,7 @@
 #include <linux/ratelimit.h>
 
 /* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
 {
 	u8 *ptr = NULL;
 
@@ -54,13 +54,14 @@ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
 		return ptr;
 	return NULL;
 }
+EXPORT_SYMBOL(bpf_internal_load_pointer_neg_helper);
 
 static inline void *load_pointer(const struct sk_buff *skb, int k,
 				 unsigned int size, void *buffer)
 {
 	if (k >= 0)
 		return skb_header_pointer(skb, k, size, buffer);
-	return __load_pointer(skb, k, size);
+	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
 /**


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-29 11:54       ` Jan Seiffert
@ 2012-03-29 13:57         ` Eric Dumazet
  2012-03-30  9:11         ` Eric Dumazet
  1 sibling, 0 replies; 14+ messages in thread
From: Eric Dumazet @ 2012-03-29 13:57 UTC (permalink / raw)
  To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans

Le jeudi 29 mars 2012 à 13:54 +0200, Jan Seiffert a écrit :

> Ok, to keep the ball rolling here is a V2 with the changes you suggested:
> 
...

> I propose the following patch to fix this situation.
> Lightly tested on x86, but the powerpc asm part is prop. wrong.
> 
> 
> Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>
> 

x86 part seems very good at first glance, I'll test it and give my
feedback soon.

Thanks !



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-29 11:54       ` Jan Seiffert
  2012-03-29 13:57         ` Eric Dumazet
@ 2012-03-30  9:11         ` Eric Dumazet
  2012-03-30 13:42           ` Jan Seiffert
  1 sibling, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2012-03-30  9:11 UTC (permalink / raw)
  To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans

Le jeudi 29 mars 2012 à 13:54 +0200, Jan Seiffert a écrit :

> 
> +	.p2align 1
>  bpf_slow_path_word:
>  	bpf_slow_path_common(4)
>  	js	bpf_error
> @@ -115,6 +116,7 @@ bpf_slow_path_word:
>  	bswap	%eax
>  	ret
>  
> +	.p2align 1
>  bpf_slow_path_half:
>  	bpf_slow_path_common(2)
>  	js	bpf_error
> @@ -123,12 +125,14 @@ bpf_slow_path_half:
>  	movzwl	%ax,%eax
>  	ret

All these ".p2align 1" are noise for this patch.

This should be done as separate patch, explaining the rationale.

...

>  			case BPF_S_LD_W_IND:
> -				func = sk_load_word_ind;
> +				func = sk_load_word;
>  common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
>  				t_offset = func - (image + addrs[i]);
> -				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
> +				if (K) {
> +					EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
> +					EMIT(K, 4);
> +				} else {
> +					EMIT2(0x89,0xde); /* mov %ebx,%esi */
> +				}
>  				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
>  				break;


Please add the code for imm8 offsets as well ?

if (is_imm8(K))
	EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx),%esi */
else
	EMIT2_off32(0x8d, 0xb3, K); /* lea imm32(%rbx),%esi */

Thanks




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-30  9:11         ` Eric Dumazet
@ 2012-03-30 13:42           ` Jan Seiffert
  2012-03-30 14:08             ` Eric Dumazet
  2012-03-30 15:19             ` Matt Evans
  0 siblings, 2 replies; 14+ messages in thread
From: Jan Seiffert @ 2012-03-30 13:42 UTC (permalink / raw)
  To: netdev; +Cc: Eric Dumazet, linux-kernel, David S. Miller, Matt Evans

Eric Dumazet schrieb:
> [snip]
> All these ".p2align 1" are noise for this patch.
>
> This should be done as separate patch, explaining the rationale.
>
> ...
Ok, i thought since you where concerned with the performance and
I'm touching this stuff anyway.
But you are right, can be done separately.
So gone.

> [snip]
>
> Please add the code for imm8 offsets as well ?
>
> if (is_imm8(K))
> 	EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx),%esi */
> else
> 	EMIT2_off32(0x8d, 0xb3, K); /* lea imm32(%rbx),%esi */

Right, there is this imm8 form. I left it out because i never saw gas emit it
and totally forgot about it.
Your wish is my command. But since there is no EMIT2_off32 and introducing
it would mean additional cleanup noise, i stayed with two emits.

Do you know where i can ping the powerpc guys a little bit harder?

So here a v3 of the patch:

Consider the following test program:

#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <pcap-bpf.h>

#define die(x) do {perror(x); return 1;} while (0)
struct bpf_insn udp_filter[] = {
	/*   0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax	net[0] */
	/*   1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0),             /* ldb	[x+0] */
	/*   2 */ BPF_STMT(BPF_RET|BPF_A, 0),                    /* ret	a */
};

int main(int argc, char *argv[])
{
	char buf[512];
	struct sockaddr_in addr;
	struct bpf_program prg;
	socklen_t addr_s;
	ssize_t res;
	int fd;

	addr.sin_family = AF_INET;
	addr.sin_port = htons(5000);
	addr.sin_addr.s_addr = 0;
	addr_s = sizeof(addr);
	prg.bf_len = sizeof(udp_filter)/sizeof(udp_filter[0]);
	prg.bf_insns = udp_filter;
	if(-1 == (fd = socket(AF_INET, SOCK_DGRAM, 0)))
		die("socket");
	if(-1 == bind(fd, (struct sockaddr *)&addr, sizeof(addr)))
		die("bind");
	if(-1 == setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prg, sizeof(prg)))
		die("setsockopt");
	res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr *)&addr, &addr_s);
	if(res != -1)
		printf("packet received: %zi bytes\n", res);
	else
		die("recvfrom");
	return 0;
}

when used with the bpf jit disabled works:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1: packet received: 6 bytes

When the bpf jit gets enabled (echo 100 >
/proc/sys/net/core/bpf_jit_enable) the same program stops working:
console 1 $ ./bpf
console 2 $ echo "hello" | nc -u localhost 5000
console 1:

The reason is that both jits (x86 and powerpc) do not handle negative
memory references like SKF_NET_OFF or SKF_LL_OFF, only the simple
ancillary data references are supported (by mapping to special
instructions).
In the case of an absolute reference, the jit aborts the translation
if a negative reference is seen, also a negative k on the indirect
load aborts the translation, but if X is negative to begin with, only
the error handler is reached at runtime which drops the whole packet.

I propose the following patch to fix this situation.
Lightly tested on x86, but the powerpc asm part is prop. wrong.

Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index af1ab5e..e9b57b3 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -49,6 +49,10 @@
  * Assembly helpers from arch/powerpc/net/bpf_jit.S:
  */
 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
 
 #define FUNCTION_DESCR_SIZE	24
 
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index ff4506e..e590aa5 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -31,14 +31,13 @@
  * then branch directly to slow_path_XXX if required.  (In fact, could
  * load a spare GPR with the address of slow_path_generic and pass size
  * as an argument, making the call site a mtlr, li and bllr.)
- *
- * Technically, the "is addr < 0" check is unnecessary & slowing down
- * the ABS path, as it's statically checked on generation.
  */
 	.globl	sk_load_word
 sk_load_word:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_word_neg
+	.globl	sk_load_word_positive_offset
+sk_load_word_positive_offset:
 	/* Are we accessing past headlen? */
 	subi	r_scratch1, r_HL, 4
 	cmpd	r_scratch1, r_addr
@@ -51,7 +50,9 @@ sk_load_word:
 	.globl	sk_load_half
 sk_load_half:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_half_neg
+	.globl	sk_load_half_positive_offset
+sk_load_half_positive_offset:
 	subi	r_scratch1, r_HL, 2
 	cmpd	r_scratch1, r_addr
 	blt	bpf_slow_path_half
@@ -61,7 +62,9 @@ sk_load_half:
 	.globl	sk_load_byte
 sk_load_byte:
 	cmpdi	r_addr, 0
-	blt	bpf_error
+	blt	bpf_slow_path_byte_neg
+	.globl	sk_load_byte_positive_offset
+sk_load_byte_positive_offset:
 	cmpd	r_HL, r_addr
 	ble	bpf_slow_path_byte
 	lbzx	r_A, r_D, r_addr
@@ -69,22 +72,20 @@ sk_load_byte:
 
 /*
  * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
- * r_addr is the offset value, already known positive
+ * r_addr is the offset value
  */
 	.globl sk_load_byte_msh
 sk_load_byte_msh:
+	cmpdi	r_addr, 0
+	blt	bpf_slow_path_byte_msh_neg
+	.globl sk_load_byte_msh_positive_offset
+sk_load_byte_msh_positive_offset:
 	cmpd	r_HL, r_addr
 	ble	bpf_slow_path_byte_msh
 	lbzx	r_X, r_D, r_addr
 	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
 	blr
 
-bpf_error:
-	/* Entered with cr0 = lt */
-	li	r3, 0
-	/* Generated code will 'blt epilogue', returning 0. */
-	blr
-
 /* Call out to skb_copy_bits:
  * We'll need to back up our volatile regs first; we have
  * local variable space at r1+(BPF_PPC_STACK_BASIC).
@@ -136,3 +137,85 @@ bpf_slow_path_byte_msh:
 	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
 	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
 	blr
+
+/* Call out to bpf_internal_load_pointer_neg_helper:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define sk_negative_common(SIZE)				\
+	mflr	r0;						\
+	std	r0, 16(r1);					\
+	/* R3 goes in parameter space of caller's frame */	\
+	std	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	std	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	std	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	stdu	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
+	/* R3 = r_skb, as passed */				\
+	mr	r4, r_addr;					\
+	li	r5, SIZE;					\
+	bl	bpf_internal_load_pointer_neg_helper;		\
+	/* R3 != 0 on success */				\
+	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
+	ld	r0, 16(r1);					\
+	ld	r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);		\
+	ld	r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);		\
+	mtlr	r0;						\
+	cmpldi	r3, 0;						\
+	beq	bpf_error_slow;	/* cr0 = EQ */			\
+	mr	r_addr, r3;					\
+	ld	r_skb, (BPF_PPC_STACKFRAME+48)(r1);		\
+	/* Great success! */
+
+bpf_slow_path_word_neg:
+	lis     r_scratch1,-32	/* SKF_LL_OFF */
+	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	blt	bpf_error	/* cr0 = LT */
+	.globl	sk_load_word_negative_offset
+sk_load_word_negative_offset:
+	sk_negative_common(4)
+	lwz	r_A, 0(r_addr)
+	blr
+
+bpf_slow_path_half_neg:
+	lis     r_scratch1,-32	/* SKF_LL_OFF */
+	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	blt	bpf_error	/* cr0 = LT */
+	.globl	sk_load_half_negative_offset
+sk_load_half_negative_offset:
+	sk_negative_common(2)
+	lhz	r_A, 0(r_addr)
+	blr
+
+bpf_slow_path_byte_neg:
+	lis     r_scratch1,-32	/* SKF_LL_OFF */
+	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	blt	bpf_error	/* cr0 = LT */
+	.globl	sk_load_byte_negative_offset
+sk_load_byte_negative_offset:
+	sk_negative_common(1)
+	lbz	r_A, 0(r_addr)
+	blr
+
+bpf_slow_path_byte_msh_neg:
+	lis     r_scratch1,-32	/* SKF_LL_OFF */
+	cmpd	r_addr, r_scratch1	/* addr < SKF_* */
+	blt	bpf_error	/* cr0 = LT */
+	.globl	sk_load_byte_msh_negative_offset
+sk_load_byte_msh_negative_offset:
+	sk_negative_common(1)
+	lbz	r_X, 0(r_addr)
+	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
+	blr
+
+bpf_error_slow:
+	/* fabricate a cr0 = lt */
+	li	r_scratch1, -1
+	cmpdi	r_scratch1, 0
+bpf_error:
+	/* Entered with cr0 = lt */
+	li	r3, 0
+	/* Generated code will 'blt epilogue', returning 0. */
+	blr
+
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 73619d3..2dc8b14 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -127,6 +127,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	PPC_BLR();
 }
 
+#define CHOOSE_LOAD_FUNC(K, func) \
+	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+
 /* Assemble the body code between the prologue & epilogue. */
 static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			      struct codegen_context *ctx,
@@ -391,21 +394,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 
 			/*** Absolute loads from packet header/data ***/
 		case BPF_S_LD_W_ABS:
-			func = sk_load_word;
+			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
 			goto common_load;
 		case BPF_S_LD_H_ABS:
-			func = sk_load_half;
+			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
 			goto common_load;
 		case BPF_S_LD_B_ABS:
-			func = sk_load_byte;
+			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
 		common_load:
-			/*
-			 * Load from [K].  Reference with the (negative)
-			 * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
-			 */
+			/* Load from [K]. */
 			ctx->seen |= SEEN_DATAREF;
-			if ((int)K < 0)
-				return -ENOTSUPP;
 			PPC_LI64(r_scratch1, func);
 			PPC_MTLR(r_scratch1);
 			PPC_LI32(r_addr, K);
@@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 		common_load_ind:
 			/*
 			 * Load from [X + K].  Negative offsets are tested for
-			 * in the helper functions, and result in a 'ret 0'.
+			 * in the helper functions.
 			 */
 			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
 			PPC_LI64(r_scratch1, func);
@@ -443,13 +441,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			break;
 
 		case BPF_S_LDX_B_MSH:
-			/*
-			 * x86 version drops packet (RET 0) when K<0, whereas
-			 * interpreter does allow K<0 (__load_pointer, special
-			 * ancillary data).  common_load returns ENOTSUPP if K<0,
-			 * so we fall back to interpreter & filter works.
-			 */
-			func = sk_load_byte_msh;
+			func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
 			goto common_load;
 			break;
 
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6687022..2897d7f 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,17 @@
  * r9d : hlen = skb->len - skb->data_len
  */
 #define SKBDATA	%r8
-
-sk_load_word_ind:
-	.globl	sk_load_word_ind
-
-	add	%ebx,%esi	/* offset += X */
-#	test    %esi,%esi	/* if (offset < 0) goto bpf_error; */
-	js	bpf_error
+#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
 
 sk_load_word:
 	.globl	sk_load_word
 
+	test	%esi,%esi
+	js	bpf_slow_path_word_neg
+
+sk_load_word_positive_offset:
+	.globl	sk_load_word_positive_offset
+
 	mov	%r9d,%eax		# hlen
 	sub	%esi,%eax		# hlen - offset
 	cmp	$3,%eax
@@ -37,16 +37,15 @@ sk_load_word:
 	bswap   %eax  			/* ntohl() */
 	ret
 
-
-sk_load_half_ind:
-	.globl sk_load_half_ind
-
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
 sk_load_half:
 	.globl	sk_load_half
 
+	test	%esi,%esi
+	js	bpf_slow_path_half_neg
+
+sk_load_half_positive_offset:
+	.globl	sk_load_half_positive_offset
+
 	mov	%r9d,%eax
 	sub	%esi,%eax		#	hlen - offset
 	cmp	$1,%eax
@@ -55,14 +54,15 @@ sk_load_half:
 	rol	$8,%ax			# ntohs()
 	ret
 
-sk_load_byte_ind:
-	.globl sk_load_byte_ind
-	add	%ebx,%esi	/* offset += X */
-	js	bpf_error
-
 sk_load_byte:
 	.globl	sk_load_byte
 
+	test	%esi,%esi
+	js	bpf_slow_path_byte_neg
+
+sk_load_byte_positive_offset:
+	.globl	sk_load_byte_positive_offset
+
 	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
 	jle	bpf_slow_path_byte
 	movzbl	(SKBDATA,%rsi),%eax
@@ -73,25 +73,21 @@ sk_load_byte:
  *
  * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
  * Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
  */
-ENTRY(sk_load_byte_msh)
-	CFI_STARTPROC
+sk_load_byte_msh:
+	.globl	sk_load_byte_msh
+	test	%esi,%esi
+	js	bpf_slow_path_byte_msh_neg
+
+sk_load_byte_msh_positive_offset:
+	.globl	sk_load_byte_msh_positive_offset
 	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
 	jle	bpf_slow_path_byte_msh
 	movzbl	(SKBDATA,%rsi),%ebx
 	and	$15,%bl
 	shl	$2,%bl
 	ret
-	CFI_ENDPROC
-ENDPROC(sk_load_byte_msh)
-
-bpf_error:
-# force a return 0 from jit handler
-	xor		%eax,%eax
-	mov		-8(%rbp),%rbx
-	leaveq
-	ret
 
 /* rsi contains offset and can be scratched */
 #define bpf_slow_path_common(LEN)		\
@@ -138,3 +134,68 @@ bpf_slow_path_byte_msh:
 	shl	$2,%al
 	xchg	%eax,%ebx
 	ret
+
+#define sk_negative_common(SIZE)				\
+	push	%rdi;	/* save skb */				\
+	push	%r9;						\
+	push	SKBDATA;					\
+/* rsi already has offset */					\
+	mov	$SIZE,%ecx;	/* size */			\
+	call	bpf_internal_load_pointer_neg_helper;		\
+	test	%rax,%rax;					\
+	pop	SKBDATA;					\
+	pop	%r9;						\
+	pop	%rdi;						\
+	jz	bpf_error
+
+
+bpf_slow_path_word_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
+	jl	bpf_error	/* offset lower -> error  */
+sk_load_word_negative_offset:
+	.globl	sk_load_word_negative_offset
+	sk_negative_common(4)
+	mov	(%rax), %eax
+	bswap	%eax
+	ret
+
+bpf_slow_path_half_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi
+	jl	bpf_error
+sk_load_half_negative_offset:
+	.globl	sk_load_half_negative_offset
+	sk_negative_common(2)
+	mov	(%rax),%ax
+	rol	$8,%ax
+	movzwl	%ax,%eax
+	ret
+
+bpf_slow_path_byte_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi
+	jl	bpf_error
+sk_load_byte_negative_offset:
+	.globl	sk_load_byte_negative_offset
+	sk_negative_common(1)
+	movzbl	(%rax), %eax
+	ret
+
+bpf_slow_path_byte_msh_neg:
+	cmp	SKF_MAX_NEG_OFF, %esi
+	jl	bpf_error
+sk_load_byte_msh_negative_offset:
+	.globl	sk_load_byte_msh_negative_offset
+	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
+	sk_negative_common(1)
+	movzbl	(%rax),%eax
+	and	$15,%al
+	shl	$2,%al
+	xchg	%eax,%ebx
+	ret
+
+bpf_error:
+# force a return 0 from jit handler
+	xor		%eax,%eax
+	mov		-8(%rbp),%rbx
+	leaveq
+	ret
+
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5671752..39a2e2c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
  * assembly code in arch/x86/net/bpf_jit.S
  */
 extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
 
 static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
 	set_fs(old_fs);
 }
 
+#define CHOOSE_LOAD_FUNC(K, func) \
+	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
 void bpf_jit_compile(struct sk_filter *fp)
 {
@@ -473,44 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
 #endif
 				break;
 			case BPF_S_LD_W_ABS:
-				func = sk_load_word;
+				func = CHOOSE_LOAD_FUNC(K, sk_load_word);
 common_load:			seen |= SEEN_DATAREF;
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
 				t_offset = func - (image + addrs[i]);
 				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call */
 				break;
 			case BPF_S_LD_H_ABS:
-				func = sk_load_half;
+				func = CHOOSE_LOAD_FUNC(K, sk_load_half);
 				goto common_load;
 			case BPF_S_LD_B_ABS:
-				func = sk_load_byte;
+				func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
 				goto common_load;
 			case BPF_S_LDX_B_MSH:
-				if ((int)K < 0) {
-					/* Abort the JIT because __load_pointer() is needed. */
-					goto out;
-				}
+				func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
 				seen |= SEEN_DATAREF | SEEN_XREG;
-				t_offset = sk_load_byte_msh - (image + addrs[i]);
+				t_offset = func - (image + addrs[i]);
 				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
 				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
 				break;
 			case BPF_S_LD_W_IND:
-				func = sk_load_word_ind;
+				func = sk_load_word;
 common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
+				if (K) {
+					if (is_imm8(K)) {
+						EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
+					} else {
+						EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+						EMIT(K, 4);
+					}
+				} else {
+					EMIT2(0x89,0xde); /* mov %ebx,%esi */
+				}
 				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
 				break;
 			case BPF_S_LD_H_IND:
-				func = sk_load_half_ind;
+				func = sk_load_half;
 				goto common_load_ind;
 			case BPF_S_LD_B_IND:
-				func = sk_load_byte_ind;
+				func = sk_load_byte;
 				goto common_load_ind;
 			case BPF_S_JMP_JA:
 				t_offset = addrs[i + K] - addrs[i];
diff --git a/net/core/filter.c b/net/core/filter.c
index 5dea452..04ca613 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -41,7 +41,7 @@
 #include <linux/ratelimit.h>
 
 /* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
 {
 	u8 *ptr = NULL;
 
@@ -54,13 +54,14 @@ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
 		return ptr;
 	return NULL;
 }
+EXPORT_SYMBOL(bpf_internal_load_pointer_neg_helper);
 
 static inline void *load_pointer(const struct sk_buff *skb, int k,
 				 unsigned int size, void *buffer)
 {
 	if (k >= 0)
 		return skb_header_pointer(skb, k, size, buffer);
-	return __load_pointer(skb, k, size);
+	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
 /**



^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-30 13:42           ` Jan Seiffert
@ 2012-03-30 14:08             ` Eric Dumazet
  2012-03-30 14:11               ` Josh Boyer
  2012-03-30 15:24               ` Matt Evans
  2012-03-30 15:19             ` Matt Evans
  1 sibling, 2 replies; 14+ messages in thread
From: Eric Dumazet @ 2012-03-30 14:08 UTC (permalink / raw)
  To: Jan Seiffert; +Cc: netdev, linux-kernel, David S. Miller, Matt Evans

On Fri, 2012-03-30 at 15:42 +0200, Jan Seiffert wrote:

> Do you know where i can ping the powerpc guys a little bit harder?

maybe using CC linux-arch@vger.kernel.org

One way to handle that would be to split this patch in 3 parts, so that
I can Ack the non ppc part.

[1] introduce bpf_internal_load_pointer_neg_helper()
	[BTW no need to EXPORT_SYMBOL it, since jit is not a module ]

[2] x86_bpf_iit: handle negative offsets in loads

[3] ppc_bpf_jit: handle negative offsets in loads




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-30 14:08             ` Eric Dumazet
@ 2012-03-30 14:11               ` Josh Boyer
  2012-03-30 15:24               ` Matt Evans
  1 sibling, 0 replies; 14+ messages in thread
From: Josh Boyer @ 2012-03-30 14:11 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jan Seiffert, netdev, linux-kernel, David S. Miller, Matt Evans

On Fri, Mar 30, 2012 at 10:08 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Fri, 2012-03-30 at 15:42 +0200, Jan Seiffert wrote:
>
>> Do you know where i can ping the powerpc guys a little bit harder?
>
> maybe using CC linux-arch@vger.kernel.org

Or linuxppc-dev@lists.ozlabs.org

josh

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-30 13:42           ` Jan Seiffert
  2012-03-30 14:08             ` Eric Dumazet
@ 2012-03-30 15:19             ` Matt Evans
  2012-03-30 15:51               ` Jan Seiffert
  1 sibling, 1 reply; 14+ messages in thread
From: Matt Evans @ 2012-03-30 15:19 UTC (permalink / raw)
  To: Jan Seiffert
  Cc: <netdev@vger.kernel.org>,
	Eric Dumazet, <linux-kernel@vger.kernel.org>,
	David S. Miller

Hi Jan,

On 30 Mar 2012, at 14:42, Jan Seiffert <kaffeemonster@googlemail.com> wrote:

> Eric Dumazet schrieb:
>> [snip]
>> All these ".p2align 1" are noise for this patch.
>> 
>> This should be done as separate patch, explaining the rationale.
>> 
>> ...
> Ok, i thought since you where concerned with the performance and
> I'm touching this stuff anyway.
> But you are right, can be done separately.
> So gone.
> 
>> [snip]
>> 
>> Please add the code for imm8 offsets as well ?
>> 
>> if (is_imm8(K))
>>    EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx),%esi */
>> else
>>    EMIT2_off32(0x8d, 0xb3, K); /* lea imm32(%rbx),%esi */
> 
> Right, there is this imm8 form. I left it out because i never saw gas emit it
> and totally forgot about it.
> Your wish is my command. But since there is no EMIT2_off32 and introducing
> it would mean additional cleanup noise, i stayed with two emits.
> 
> Do you know where i can ping the powerpc guys a little bit harder?

No need (unless you mean a different guy), I have enough guilt as it is! :) (Sorry for no response, am moving house and am netless.) I skimmed your patches on my phone but hope to be in a state to review/test over the weekend or early next week. :)


Cheers,

Matt


> 
> So here a v3 of the patch:
> 
> Consider the following test program:
> 
> #include <stdio.h>
> #include <sys/types.h>
> #include <sys/socket.h>
> #include <netinet/in.h>
> #include <pcap-bpf.h>
> 
> #define die(x) do {perror(x); return 1;} while (0)
> struct bpf_insn udp_filter[] = {
>    /*   0 */ BPF_STMT(BPF_LDX|BPF_W|BPF_IMM, -1048576+(0)), /* leax    net[0] */
>    /*   1 */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0),             /* ldb    [x+0] */
>    /*   2 */ BPF_STMT(BPF_RET|BPF_A, 0),                    /* ret    a */
> };
> 
> int main(int argc, char *argv[])
> {
>    char buf[512];
>    struct sockaddr_in addr;
>    struct bpf_program prg;
>    socklen_t addr_s;
>    ssize_t res;
>    int fd;
> 
>    addr.sin_family = AF_INET;
>    addr.sin_port = htons(5000);
>    addr.sin_addr.s_addr = 0;
>    addr_s = sizeof(addr);
>    prg.bf_len = sizeof(udp_filter)/sizeof(udp_filter[0]);
>    prg.bf_insns = udp_filter;
>    if(-1 == (fd = socket(AF_INET, SOCK_DGRAM, 0)))
>        die("socket");
>    if(-1 == bind(fd, (struct sockaddr *)&addr, sizeof(addr)))
>        die("bind");
>    if(-1 == setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prg, sizeof(prg)))
>        die("setsockopt");
>    res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr *)&addr, &addr_s);
>    if(res != -1)
>        printf("packet received: %zi bytes\n", res);
>    else
>        die("recvfrom");
>    return 0;
> }
> 
> when used with the bpf jit disabled works:
> console 1 $ ./bpf
> console 2 $ echo "hello" | nc -u localhost 5000
> console 1: packet received: 6 bytes
> 
> When the bpf jit gets enabled (echo 100 >
> /proc/sys/net/core/bpf_jit_enable) the same program stops working:
> console 1 $ ./bpf
> console 2 $ echo "hello" | nc -u localhost 5000
> console 1:
> 
> The reason is that both jits (x86 and powerpc) do not handle negative
> memory references like SKF_NET_OFF or SKF_LL_OFF, only the simple
> ancillary data references are supported (by mapping to special
> instructions).
> In the case of an absolute reference, the jit aborts the translation
> if a negative reference is seen, also a negative k on the indirect
> load aborts the translation, but if X is negative to begin with, only
> the error handler is reached at runtime which drops the whole packet.
> 
> I propose the following patch to fix this situation.
> Lightly tested on x86, but the powerpc asm part is prop. wrong.
> 
> Signed-of-by: Jan Seiffert <kaffeemonster@googlemail.com>
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index af1ab5e..e9b57b3 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -49,6 +49,10 @@
>  * Assembly helpers from arch/powerpc/net/bpf_jit.S:
>  */
> extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
> +extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
> +extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
> +extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
> +extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
> 
> #define FUNCTION_DESCR_SIZE    24
> 
> diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
> index ff4506e..e590aa5 100644
> --- a/arch/powerpc/net/bpf_jit_64.S
> +++ b/arch/powerpc/net/bpf_jit_64.S
> @@ -31,14 +31,13 @@
>  * then branch directly to slow_path_XXX if required.  (In fact, could
>  * load a spare GPR with the address of slow_path_generic and pass size
>  * as an argument, making the call site a mtlr, li and bllr.)
> - *
> - * Technically, the "is addr < 0" check is unnecessary & slowing down
> - * the ABS path, as it's statically checked on generation.
>  */
>    .globl    sk_load_word
> sk_load_word:
>    cmpdi    r_addr, 0
> -    blt    bpf_error
> +    blt    bpf_slow_path_word_neg
> +    .globl    sk_load_word_positive_offset
> +sk_load_word_positive_offset:
>    /* Are we accessing past headlen? */
>    subi    r_scratch1, r_HL, 4
>    cmpd    r_scratch1, r_addr
> @@ -51,7 +50,9 @@ sk_load_word:
>    .globl    sk_load_half
> sk_load_half:
>    cmpdi    r_addr, 0
> -    blt    bpf_error
> +    blt    bpf_slow_path_half_neg
> +    .globl    sk_load_half_positive_offset
> +sk_load_half_positive_offset:
>    subi    r_scratch1, r_HL, 2
>    cmpd    r_scratch1, r_addr
>    blt    bpf_slow_path_half
> @@ -61,7 +62,9 @@ sk_load_half:
>    .globl    sk_load_byte
> sk_load_byte:
>    cmpdi    r_addr, 0
> -    blt    bpf_error
> +    blt    bpf_slow_path_byte_neg
> +    .globl    sk_load_byte_positive_offset
> +sk_load_byte_positive_offset:
>    cmpd    r_HL, r_addr
>    ble    bpf_slow_path_byte
>    lbzx    r_A, r_D, r_addr
> @@ -69,22 +72,20 @@ sk_load_byte:
> 
> /*
>  * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
> - * r_addr is the offset value, already known positive
> + * r_addr is the offset value
>  */
>    .globl sk_load_byte_msh
> sk_load_byte_msh:
> +    cmpdi    r_addr, 0
> +    blt    bpf_slow_path_byte_msh_neg
> +    .globl sk_load_byte_msh_positive_offset
> +sk_load_byte_msh_positive_offset:
>    cmpd    r_HL, r_addr
>    ble    bpf_slow_path_byte_msh
>    lbzx    r_X, r_D, r_addr
>    rlwinm    r_X, r_X, 2, 32-4-2, 31-2
>    blr
> 
> -bpf_error:
> -    /* Entered with cr0 = lt */
> -    li    r3, 0
> -    /* Generated code will 'blt epilogue', returning 0. */
> -    blr
> -
> /* Call out to skb_copy_bits:
>  * We'll need to back up our volatile regs first; we have
>  * local variable space at r1+(BPF_PPC_STACK_BASIC).
> @@ -136,3 +137,85 @@ bpf_slow_path_byte_msh:
>    lbz    r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
>    rlwinm    r_X, r_X, 2, 32-4-2, 31-2
>    blr
> +
> +/* Call out to bpf_internal_load_pointer_neg_helper:
> + * We'll need to back up our volatile regs first; we have
> + * local variable space at r1+(BPF_PPC_STACK_BASIC).
> + * Allocate a new stack frame here to remain ABI-compliant in
> + * stashing LR.
> + */
> +#define sk_negative_common(SIZE)                \
> +    mflr    r0;                        \
> +    std    r0, 16(r1);                    \
> +    /* R3 goes in parameter space of caller's frame */    \
> +    std    r_skb, (BPF_PPC_STACKFRAME+48)(r1);        \
> +    std    r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);        \
> +    std    r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);        \
> +    stdu    r1, -BPF_PPC_SLOWPATH_FRAME(r1);        \
> +    /* R3 = r_skb, as passed */                \
> +    mr    r4, r_addr;                    \
> +    li    r5, SIZE;                    \
> +    bl    bpf_internal_load_pointer_neg_helper;        \
> +    /* R3 != 0 on success */                \
> +    addi    r1, r1, BPF_PPC_SLOWPATH_FRAME;            \
> +    ld    r0, 16(r1);                    \
> +    ld    r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);        \
> +    ld    r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);        \
> +    mtlr    r0;                        \
> +    cmpldi    r3, 0;                        \
> +    beq    bpf_error_slow;    /* cr0 = EQ */            \
> +    mr    r_addr, r3;                    \
> +    ld    r_skb, (BPF_PPC_STACKFRAME+48)(r1);        \
> +    /* Great success! */
> +
> +bpf_slow_path_word_neg:
> +    lis     r_scratch1,-32    /* SKF_LL_OFF */
> +    cmpd    r_addr, r_scratch1    /* addr < SKF_* */
> +    blt    bpf_error    /* cr0 = LT */
> +    .globl    sk_load_word_negative_offset
> +sk_load_word_negative_offset:
> +    sk_negative_common(4)
> +    lwz    r_A, 0(r_addr)
> +    blr
> +
> +bpf_slow_path_half_neg:
> +    lis     r_scratch1,-32    /* SKF_LL_OFF */
> +    cmpd    r_addr, r_scratch1    /* addr < SKF_* */
> +    blt    bpf_error    /* cr0 = LT */
> +    .globl    sk_load_half_negative_offset
> +sk_load_half_negative_offset:
> +    sk_negative_common(2)
> +    lhz    r_A, 0(r_addr)
> +    blr
> +
> +bpf_slow_path_byte_neg:
> +    lis     r_scratch1,-32    /* SKF_LL_OFF */
> +    cmpd    r_addr, r_scratch1    /* addr < SKF_* */
> +    blt    bpf_error    /* cr0 = LT */
> +    .globl    sk_load_byte_negative_offset
> +sk_load_byte_negative_offset:
> +    sk_negative_common(1)
> +    lbz    r_A, 0(r_addr)
> +    blr
> +
> +bpf_slow_path_byte_msh_neg:
> +    lis     r_scratch1,-32    /* SKF_LL_OFF */
> +    cmpd    r_addr, r_scratch1    /* addr < SKF_* */
> +    blt    bpf_error    /* cr0 = LT */
> +    .globl    sk_load_byte_msh_negative_offset
> +sk_load_byte_msh_negative_offset:
> +    sk_negative_common(1)
> +    lbz    r_X, 0(r_addr)
> +    rlwinm    r_X, r_X, 2, 32-4-2, 31-2
> +    blr
> +
> +bpf_error_slow:
> +    /* fabricate a cr0 = lt */
> +    li    r_scratch1, -1
> +    cmpdi    r_scratch1, 0
> +bpf_error:
> +    /* Entered with cr0 = lt */
> +    li    r3, 0
> +    /* Generated code will 'blt epilogue', returning 0. */
> +    blr
> +
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 73619d3..2dc8b14 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -127,6 +127,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
>    PPC_BLR();
> }
> 
> +#define CHOOSE_LOAD_FUNC(K, func) \
> +    ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
> +
> /* Assemble the body code between the prologue & epilogue. */
> static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
>                  struct codegen_context *ctx,
> @@ -391,21 +394,16 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
> 
>            /*** Absolute loads from packet header/data ***/
>        case BPF_S_LD_W_ABS:
> -            func = sk_load_word;
> +            func = CHOOSE_LOAD_FUNC(K, sk_load_word);
>            goto common_load;
>        case BPF_S_LD_H_ABS:
> -            func = sk_load_half;
> +            func = CHOOSE_LOAD_FUNC(K, sk_load_half);
>            goto common_load;
>        case BPF_S_LD_B_ABS:
> -            func = sk_load_byte;
> +            func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
>        common_load:
> -            /*
> -             * Load from [K].  Reference with the (negative)
> -             * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
> -             */
> +            /* Load from [K]. */
>            ctx->seen |= SEEN_DATAREF;
> -            if ((int)K < 0)
> -                return -ENOTSUPP;
>            PPC_LI64(r_scratch1, func);
>            PPC_MTLR(r_scratch1);
>            PPC_LI32(r_addr, K);
> @@ -429,7 +427,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
>        common_load_ind:
>            /*
>             * Load from [X + K].  Negative offsets are tested for
> -             * in the helper functions, and result in a 'ret 0'.
> +             * in the helper functions.
>             */
>            ctx->seen |= SEEN_DATAREF | SEEN_XREG;
>            PPC_LI64(r_scratch1, func);
> @@ -443,13 +441,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
>            break;
> 
>        case BPF_S_LDX_B_MSH:
> -            /*
> -             * x86 version drops packet (RET 0) when K<0, whereas
> -             * interpreter does allow K<0 (__load_pointer, special
> -             * ancillary data).  common_load returns ENOTSUPP if K<0,
> -             * so we fall back to interpreter & filter works.
> -             */
> -            func = sk_load_byte_msh;
> +            func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
>            goto common_load;
>            break;
> 
> diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
> index 6687022..2897d7f 100644
> --- a/arch/x86/net/bpf_jit.S
> +++ b/arch/x86/net/bpf_jit.S
> @@ -18,17 +18,17 @@
>  * r9d : hlen = skb->len - skb->data_len
>  */
> #define SKBDATA    %r8
> -
> -sk_load_word_ind:
> -    .globl    sk_load_word_ind
> -
> -    add    %ebx,%esi    /* offset += X */
> -#    test    %esi,%esi    /* if (offset < 0) goto bpf_error; */
> -    js    bpf_error
> +#define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
> 
> sk_load_word:
>    .globl    sk_load_word
> 
> +    test    %esi,%esi
> +    js    bpf_slow_path_word_neg
> +
> +sk_load_word_positive_offset:
> +    .globl    sk_load_word_positive_offset
> +
>    mov    %r9d,%eax        # hlen
>    sub    %esi,%eax        # hlen - offset
>    cmp    $3,%eax
> @@ -37,16 +37,15 @@ sk_load_word:
>    bswap   %eax              /* ntohl() */
>    ret
> 
> -
> -sk_load_half_ind:
> -    .globl sk_load_half_ind
> -
> -    add    %ebx,%esi    /* offset += X */
> -    js    bpf_error
> -
> sk_load_half:
>    .globl    sk_load_half
> 
> +    test    %esi,%esi
> +    js    bpf_slow_path_half_neg
> +
> +sk_load_half_positive_offset:
> +    .globl    sk_load_half_positive_offset
> +
>    mov    %r9d,%eax
>    sub    %esi,%eax        #    hlen - offset
>    cmp    $1,%eax
> @@ -55,14 +54,15 @@ sk_load_half:
>    rol    $8,%ax            # ntohs()
>    ret
> 
> -sk_load_byte_ind:
> -    .globl sk_load_byte_ind
> -    add    %ebx,%esi    /* offset += X */
> -    js    bpf_error
> -
> sk_load_byte:
>    .globl    sk_load_byte
> 
> +    test    %esi,%esi
> +    js    bpf_slow_path_byte_neg
> +
> +sk_load_byte_positive_offset:
> +    .globl    sk_load_byte_positive_offset
> +
>    cmp    %esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
>    jle    bpf_slow_path_byte
>    movzbl    (SKBDATA,%rsi),%eax
> @@ -73,25 +73,21 @@ sk_load_byte:
>  *
>  * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
>  * Must preserve A accumulator (%eax)
> - * Inputs : %esi is the offset value, already known positive
> + * Inputs : %esi is the offset value
>  */
> -ENTRY(sk_load_byte_msh)
> -    CFI_STARTPROC
> +sk_load_byte_msh:
> +    .globl    sk_load_byte_msh
> +    test    %esi,%esi
> +    js    bpf_slow_path_byte_msh_neg
> +
> +sk_load_byte_msh_positive_offset:
> +    .globl    sk_load_byte_msh_positive_offset
>    cmp    %esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
>    jle    bpf_slow_path_byte_msh
>    movzbl    (SKBDATA,%rsi),%ebx
>    and    $15,%bl
>    shl    $2,%bl
>    ret
> -    CFI_ENDPROC
> -ENDPROC(sk_load_byte_msh)
> -
> -bpf_error:
> -# force a return 0 from jit handler
> -    xor        %eax,%eax
> -    mov        -8(%rbp),%rbx
> -    leaveq
> -    ret
> 
> /* rsi contains offset and can be scratched */
> #define bpf_slow_path_common(LEN)        \
> @@ -138,3 +134,68 @@ bpf_slow_path_byte_msh:
>    shl    $2,%al
>    xchg    %eax,%ebx
>    ret
> +
> +#define sk_negative_common(SIZE)                \
> +    push    %rdi;    /* save skb */                \
> +    push    %r9;                        \
> +    push    SKBDATA;                    \
> +/* rsi already has offset */                    \
> +    mov    $SIZE,%ecx;    /* size */            \
> +    call    bpf_internal_load_pointer_neg_helper;        \
> +    test    %rax,%rax;                    \
> +    pop    SKBDATA;                    \
> +    pop    %r9;                        \
> +    pop    %rdi;                        \
> +    jz    bpf_error
> +
> +
> +bpf_slow_path_word_neg:
> +    cmp    SKF_MAX_NEG_OFF, %esi    /* test range */
> +    jl    bpf_error    /* offset lower -> error  */
> +sk_load_word_negative_offset:
> +    .globl    sk_load_word_negative_offset
> +    sk_negative_common(4)
> +    mov    (%rax), %eax
> +    bswap    %eax
> +    ret
> +
> +bpf_slow_path_half_neg:
> +    cmp    SKF_MAX_NEG_OFF, %esi
> +    jl    bpf_error
> +sk_load_half_negative_offset:
> +    .globl    sk_load_half_negative_offset
> +    sk_negative_common(2)
> +    mov    (%rax),%ax
> +    rol    $8,%ax
> +    movzwl    %ax,%eax
> +    ret
> +
> +bpf_slow_path_byte_neg:
> +    cmp    SKF_MAX_NEG_OFF, %esi
> +    jl    bpf_error
> +sk_load_byte_negative_offset:
> +    .globl    sk_load_byte_negative_offset
> +    sk_negative_common(1)
> +    movzbl    (%rax), %eax
> +    ret
> +
> +bpf_slow_path_byte_msh_neg:
> +    cmp    SKF_MAX_NEG_OFF, %esi
> +    jl    bpf_error
> +sk_load_byte_msh_negative_offset:
> +    .globl    sk_load_byte_msh_negative_offset
> +    xchg    %eax,%ebx /* dont lose A , X is about to be scratched */
> +    sk_negative_common(1)
> +    movzbl    (%rax),%eax
> +    and    $15,%al
> +    shl    $2,%al
> +    xchg    %eax,%ebx
> +    ret
> +
> +bpf_error:
> +# force a return 0 from jit handler
> +    xor        %eax,%eax
> +    mov        -8(%rbp),%rbx
> +    leaveq
> +    ret
> +
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 5671752..39a2e2c 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
>  * assembly code in arch/x86/net/bpf_jit.S
>  */
> extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
> -extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
> +extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
> +extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
> +extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
> +extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
> 
> static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
> {
> @@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
>    set_fs(old_fs);
> }
> 
> +#define CHOOSE_LOAD_FUNC(K, func) \
> +    ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
> 
> void bpf_jit_compile(struct sk_filter *fp)
> {
> @@ -473,44 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
> #endif
>                break;
>            case BPF_S_LD_W_ABS:
> -                func = sk_load_word;
> +                func = CHOOSE_LOAD_FUNC(K, sk_load_word);
> common_load:            seen |= SEEN_DATAREF;
> -                if ((int)K < 0) {
> -                    /* Abort the JIT because __load_pointer() is needed. */
> -                    goto out;
> -                }
>                t_offset = func - (image + addrs[i]);
>                EMIT1_off32(0xbe, K); /* mov imm32,%esi */
>                EMIT1_off32(0xe8, t_offset); /* call */
>                break;
>            case BPF_S_LD_H_ABS:
> -                func = sk_load_half;
> +                func = CHOOSE_LOAD_FUNC(K, sk_load_half);
>                goto common_load;
>            case BPF_S_LD_B_ABS:
> -                func = sk_load_byte;
> +                func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
>                goto common_load;
>            case BPF_S_LDX_B_MSH:
> -                if ((int)K < 0) {
> -                    /* Abort the JIT because __load_pointer() is needed. */
> -                    goto out;
> -                }
> +                func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
>                seen |= SEEN_DATAREF | SEEN_XREG;
> -                t_offset = sk_load_byte_msh - (image + addrs[i]);
> +                t_offset = func - (image + addrs[i]);
>                EMIT1_off32(0xbe, K);    /* mov imm32,%esi */
>                EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
>                break;
>            case BPF_S_LD_W_IND:
> -                func = sk_load_word_ind;
> +                func = sk_load_word;
> common_load_ind:        seen |= SEEN_DATAREF | SEEN_XREG;
>                t_offset = func - (image + addrs[i]);
> -                EMIT1_off32(0xbe, K);    /* mov imm32,%esi   */
> +                if (K) {
> +                    if (is_imm8(K)) {
> +                        EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
> +                    } else {
> +                        EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
> +                        EMIT(K, 4);
> +                    }
> +                } else {
> +                    EMIT2(0x89,0xde); /* mov %ebx,%esi */
> +                }
>                EMIT1_off32(0xe8, t_offset);    /* call sk_load_xxx_ind */
>                break;
>            case BPF_S_LD_H_IND:
> -                func = sk_load_half_ind;
> +                func = sk_load_half;
>                goto common_load_ind;
>            case BPF_S_LD_B_IND:
> -                func = sk_load_byte_ind;
> +                func = sk_load_byte;
>                goto common_load_ind;
>            case BPF_S_JMP_JA:
>                t_offset = addrs[i + K] - addrs[i];
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 5dea452..04ca613 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -41,7 +41,7 @@
> #include <linux/ratelimit.h>
> 
> /* No hurry in this branch */
> -static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
> +void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
> {
>    u8 *ptr = NULL;
> 
> @@ -54,13 +54,14 @@ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
>        return ptr;
>    return NULL;
> }
> +EXPORT_SYMBOL(bpf_internal_load_pointer_neg_helper);
> 
> static inline void *load_pointer(const struct sk_buff *skb, int k,
>                 unsigned int size, void *buffer)
> {
>    if (k >= 0)
>        return skb_header_pointer(skb, k, size, buffer);
> -    return __load_pointer(skb, k, size);
> +    return bpf_internal_load_pointer_neg_helper(skb, k, size);
> }
> 
> /**
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-30 14:08             ` Eric Dumazet
  2012-03-30 14:11               ` Josh Boyer
@ 2012-03-30 15:24               ` Matt Evans
  1 sibling, 0 replies; 14+ messages in thread
From: Matt Evans @ 2012-03-30 15:24 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Jan Seiffert, netdev, linux-kernel, David S. Miller

Howdy Eric,

On 30 Mar 2012, at 15:08, Eric Dumazet <eric.dumazet@gmail.com> wrote:

> On Fri, 2012-03-30 at 15:42 +0200, Jan Seiffert wrote:
> 
>> Do you know where i can ping the powerpc guys a little bit harder?
> 
> maybe using CC linux-arch@vger.kernel.org
> 
> One way to handle that would be to split this patch in 3 parts, so that
> I can Ack the non ppc part.
> 
> [1] introduce bpf_internal_load_pointer_neg_helper()
>    [BTW no need to EXPORT_SYMBOL it, since jit is not a module ]
> 
> [2] x86_bpf_iit: handle negative offsets in loads
> 
> [3] ppc_bpf_jit: handle negative offsets in loads

Yeah, that'd be great, then I can stew on it without holding you up further. 

Cheers,

Matt



> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references
  2012-03-30 15:19             ` Matt Evans
@ 2012-03-30 15:51               ` Jan Seiffert
  0 siblings, 0 replies; 14+ messages in thread
From: Jan Seiffert @ 2012-03-30 15:51 UTC (permalink / raw)
  To: Matt Evans
  Cc: <netdev@vger.kernel.org>,
	Eric Dumazet, <linux-kernel@vger.kernel.org>

Matt Evans schrieb:
> Hi Jan,
> 
> On 30 Mar 2012, at 14:42, Jan Seiffert <kaffeemonster@googlemail.com> wrote:
> 
[snip]
>> Do you know where i can ping the powerpc guys a little bit harder?
> 
> No need (unless you mean a different guy), I have enough guilt as it
> is! :) (Sorry for no response, am moving house and am netless.)

Oh, ok, didn't know.

> I skimmed your patches on my phone but hope to be in a state to 
> review/test over the weekend or early next week. :)>> 

It would be great if you could review it.
With pinging harder i meant to get anyone fluent in powerpc asm to look
at the changes i made in the asm. I think the stack setup I've done is wrong.

> Cheers,
> 
> Matt
> 

Greetings
	Jan

> 
[snip]

-- 
Real programmer are surprised when their
odometer does not flip over from 000009
to 00000A.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-03-30 16:54 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-03-28 19:15 [REGRESSION][PATCH] bpf_jit drops the ball on indirect negative mem references Jan Seiffert
2012-03-28 20:05 ` Eric Dumazet
2012-03-28 20:26   ` Jan Seiffert
2012-03-28 20:39     ` Eric Dumazet
2012-03-29 11:54       ` Jan Seiffert
2012-03-29 13:57         ` Eric Dumazet
2012-03-30  9:11         ` Eric Dumazet
2012-03-30 13:42           ` Jan Seiffert
2012-03-30 14:08             ` Eric Dumazet
2012-03-30 14:11               ` Josh Boyer
2012-03-30 15:24               ` Matt Evans
2012-03-30 15:19             ` Matt Evans
2012-03-30 15:51               ` Jan Seiffert
2012-03-28 20:58 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.