From: Hagen Paul Pfeifer <hagen@jauu.net>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, Hagen Paul Pfeifer <hagen@jauu.net>
Subject: [PATCH] net: optimize Berkeley Packet Filter (BPF) processing
Date: Sun, 20 Jun 2010 05:05:36 +0200 [thread overview]
Message-ID: <1277003136-5522-1-git-send-email-hagen@jauu.net> (raw)
Gcc is currenlty not in the ability to optimize the switch statement in
sk_run_filter() because of dense case labels. This patch replace the
OR'd labels with ordered sequenced case labels. The sk_chk_filter()
function is modified to patch/replace the original OPCODES in a
ordered but equivalent form. gcc is now in the ability to transform the
switch statement in sk_run_filter into a jump table of complexity O(1).
Until this patch gcc generates a sequence of conditional branches (O(n) of 567
byte .text segment size (arch x86_64):
7ff: 8b 06 mov (%rsi),%eax
801: 66 83 f8 35 cmp $0x35,%ax
805: 0f 84 d0 02 00 00 je adb <sk_run_filter+0x31d>
80b: 0f 87 07 01 00 00 ja 918 <sk_run_filter+0x15a>
811: 66 83 f8 15 cmp $0x15,%ax
815: 0f 84 c5 02 00 00 je ae0 <sk_run_filter+0x322>
81b: 77 73 ja 890 <sk_run_filter+0xd2>
81d: 66 83 f8 04 cmp $0x4,%ax
821: 0f 84 17 02 00 00 je a3e <sk_run_filter+0x280>
827: 77 29 ja 852 <sk_run_filter+0x94>
829: 66 83 f8 01 cmp $0x1,%ax
[...]
With the modification the compiler translate the switch statement into
the following jump table fragment:
7ff: 66 83 3e 2c cmpw $0x2c,(%rsi)
803: 0f 87 1f 02 00 00 ja a28 <sk_run_filter+0x26a>
809: 0f b7 06 movzwl (%rsi),%eax
80c: ff 24 c5 00 00 00 00 jmpq *0x0(,%rax,8)
813: 44 89 e3 mov %r12d,%ebx
816: e9 43 03 00 00 jmpq b5e <sk_run_filter+0x3a0>
81b: 41 89 dc mov %ebx,%r12d
81e: e9 3b 03 00 00 jmpq b5e <sk_run_filter+0x3a0>
Furthermore, I reordered the instructions to reduce cache line misses by
order the most common instruction to the start.
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
---
include/linux/filter.h | 48 +++++++++++
net/core/filter.c | 212 ++++++++++++++++++++++++++++++++++++------------
2 files changed, 209 insertions(+), 51 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d7..69b43db 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -91,6 +91,54 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */
#define BPF_TAX 0x00
#define BPF_TXA 0x80
+enum {
+ BPF_S_RET_K = 0,
+ BPF_S_RET_A,
+ BPF_S_ALU_ADD_K,
+ BPF_S_ALU_ADD_X,
+ BPF_S_ALU_SUB_K,
+ BPF_S_ALU_SUB_X,
+ BPF_S_ALU_MUL_K,
+ BPF_S_ALU_MUL_X,
+ BPF_S_ALU_DIV_X,
+ BPF_S_ALU_AND_K,
+ BPF_S_ALU_AND_X,
+ BPF_S_ALU_OR_K,
+ BPF_S_ALU_OR_X,
+ BPF_S_ALU_LSH_K,
+ BPF_S_ALU_LSH_X,
+ BPF_S_ALU_RSH_K,
+ BPF_S_ALU_RSH_X,
+ BPF_S_ALU_NEG,
+ BPF_S_LD_W_ABS,
+ BPF_S_LD_H_ABS,
+ BPF_S_LD_B_ABS,
+ BPF_S_LD_W_LEN,
+ BPF_S_LD_W_IND,
+ BPF_S_LD_H_IND,
+ BPF_S_LD_B_IND,
+ BPF_S_LD_IMM,
+ BPF_S_LDX_W_LEN,
+ BPF_S_LDX_B_MSH,
+ BPF_S_LDX_IMM,
+ BPF_S_MISC_TAX,
+ BPF_S_MISC_TXA,
+ BPF_S_ALU_DIV_K,
+ BPF_S_LD_MEM,
+ BPF_S_LDX_MEM,
+ BPF_S_ST,
+ BPF_S_STX,
+ BPF_S_JMP_JA,
+ BPF_S_JMP_JEQ_K,
+ BPF_S_JMP_JEQ_X,
+ BPF_S_JMP_JGE_K,
+ BPF_S_JMP_JGE_X,
+ BPF_S_JMP_JGT_K,
+ BPF_S_JMP_JGT_X,
+ BPF_S_JMP_JSET_K,
+ BPF_S_JMP_JSET_X,
+};
+
#ifndef BPF_MAXINSNS
#define BPF_MAXINSNS 4096
#endif
diff --git a/net/core/filter.c b/net/core/filter.c
index da69fb7..6e3e322 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
fentry = &filter[pc];
switch (fentry->code) {
- case BPF_ALU|BPF_ADD|BPF_X:
+ case BPF_S_ALU_ADD_X:
A += X;
continue;
- case BPF_ALU|BPF_ADD|BPF_K:
+ case BPF_S_ALU_ADD_K:
A += fentry->k;
continue;
- case BPF_ALU|BPF_SUB|BPF_X:
+ case BPF_S_ALU_SUB_X:
A -= X;
continue;
- case BPF_ALU|BPF_SUB|BPF_K:
+ case BPF_S_ALU_SUB_K:
A -= fentry->k;
continue;
- case BPF_ALU|BPF_MUL|BPF_X:
+ case BPF_S_ALU_MUL_X:
A *= X;
continue;
- case BPF_ALU|BPF_MUL|BPF_K:
+ case BPF_S_ALU_MUL_K:
A *= fentry->k;
continue;
- case BPF_ALU|BPF_DIV|BPF_X:
+ case BPF_S_ALU_DIV_X:
if (X == 0)
return 0;
A /= X;
continue;
- case BPF_ALU|BPF_DIV|BPF_K:
+ case BPF_S_ALU_DIV_K:
A /= fentry->k;
continue;
- case BPF_ALU|BPF_AND|BPF_X:
+ case BPF_S_ALU_AND_X:
A &= X;
continue;
- case BPF_ALU|BPF_AND|BPF_K:
+ case BPF_S_ALU_AND_K:
A &= fentry->k;
continue;
- case BPF_ALU|BPF_OR|BPF_X:
+ case BPF_S_ALU_OR_X:
A |= X;
continue;
- case BPF_ALU|BPF_OR|BPF_K:
+ case BPF_S_ALU_OR_K:
A |= fentry->k;
continue;
- case BPF_ALU|BPF_LSH|BPF_X:
+ case BPF_S_ALU_LSH_X:
A <<= X;
continue;
- case BPF_ALU|BPF_LSH|BPF_K:
+ case BPF_S_ALU_LSH_K:
A <<= fentry->k;
continue;
- case BPF_ALU|BPF_RSH|BPF_X:
+ case BPF_S_ALU_RSH_X:
A >>= X;
continue;
- case BPF_ALU|BPF_RSH|BPF_K:
+ case BPF_S_ALU_RSH_K:
A >>= fentry->k;
continue;
- case BPF_ALU|BPF_NEG:
+ case BPF_S_ALU_NEG:
A = -A;
continue;
- case BPF_JMP|BPF_JA:
+ case BPF_S_JMP_JA:
pc += fentry->k;
continue;
- case BPF_JMP|BPF_JGT|BPF_K:
+ case BPF_S_JMP_JGT_K:
pc += (A > fentry->k) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JGE|BPF_K:
+ case BPF_S_JMP_JGE_K:
pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JEQ|BPF_K:
+ case BPF_S_JMP_JEQ_K:
pc += (A == fentry->k) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JSET|BPF_K:
+ case BPF_S_JMP_JSET_K:
pc += (A & fentry->k) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JGT|BPF_X:
+ case BPF_S_JMP_JGT_X:
pc += (A > X) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JGE|BPF_X:
+ case BPF_S_JMP_JGE_X:
pc += (A >= X) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JEQ|BPF_X:
+ case BPF_S_JMP_JEQ_X:
pc += (A == X) ? fentry->jt : fentry->jf;
continue;
- case BPF_JMP|BPF_JSET|BPF_X:
+ case BPF_S_JMP_JSET_X:
pc += (A & X) ? fentry->jt : fentry->jf;
continue;
- case BPF_LD|BPF_W|BPF_ABS:
+ case BPF_S_LD_W_ABS:
k = fentry->k;
load_w:
ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
continue;
}
break;
- case BPF_LD|BPF_H|BPF_ABS:
+ case BPF_S_LD_H_ABS:
k = fentry->k;
load_h:
ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
continue;
}
break;
- case BPF_LD|BPF_B|BPF_ABS:
+ case BPF_S_LD_B_ABS:
k = fentry->k;
load_b:
ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
continue;
}
break;
- case BPF_LD|BPF_W|BPF_LEN:
+ case BPF_S_LD_W_LEN:
A = skb->len;
continue;
- case BPF_LDX|BPF_W|BPF_LEN:
+ case BPF_S_LDX_W_LEN:
X = skb->len;
continue;
- case BPF_LD|BPF_W|BPF_IND:
+ case BPF_S_LD_W_IND:
k = X + fentry->k;
goto load_w;
- case BPF_LD|BPF_H|BPF_IND:
+ case BPF_S_LD_H_IND:
k = X + fentry->k;
goto load_h;
- case BPF_LD|BPF_B|BPF_IND:
+ case BPF_S_LD_B_IND:
k = X + fentry->k;
goto load_b;
- case BPF_LDX|BPF_B|BPF_MSH:
+ case BPF_S_LDX_B_MSH:
ptr = load_pointer(skb, fentry->k, 1, &tmp);
if (ptr != NULL) {
X = (*(u8 *)ptr & 0xf) << 2;
continue;
}
return 0;
- case BPF_LD|BPF_IMM:
+ case BPF_S_LD_IMM:
A = fentry->k;
continue;
- case BPF_LDX|BPF_IMM:
+ case BPF_S_LDX_IMM:
X = fentry->k;
continue;
- case BPF_LD|BPF_MEM:
+ case BPF_S_LD_MEM:
A = mem[fentry->k];
continue;
- case BPF_LDX|BPF_MEM:
+ case BPF_S_LDX_MEM:
X = mem[fentry->k];
continue;
- case BPF_MISC|BPF_TAX:
+ case BPF_S_MISC_TAX:
X = A;
continue;
- case BPF_MISC|BPF_TXA:
+ case BPF_S_MISC_TXA:
A = X;
continue;
- case BPF_RET|BPF_K:
+ case BPF_S_RET_K:
return fentry->k;
- case BPF_RET|BPF_A:
+ case BPF_S_RET_A:
return A;
- case BPF_ST:
+ case BPF_S_ST:
mem[fentry->k] = A;
continue;
- case BPF_STX:
+ case BPF_S_STX:
mem[fentry->k] = X;
continue;
default:
@@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
/* Only allow valid instructions */
switch (ftest->code) {
case BPF_ALU|BPF_ADD|BPF_K:
+ ftest->code = BPF_S_ALU_ADD_K;
+ break;
case BPF_ALU|BPF_ADD|BPF_X:
+ ftest->code = BPF_S_ALU_ADD_X;
+ break;
case BPF_ALU|BPF_SUB|BPF_K:
+ ftest->code = BPF_S_ALU_SUB_K;
+ break;
case BPF_ALU|BPF_SUB|BPF_X:
+ ftest->code = BPF_S_ALU_SUB_X;
+ break;
case BPF_ALU|BPF_MUL|BPF_K:
+ ftest->code = BPF_S_ALU_MUL_K;
+ break;
case BPF_ALU|BPF_MUL|BPF_X:
+ ftest->code = BPF_S_ALU_MUL_X;
+ break;
case BPF_ALU|BPF_DIV|BPF_X:
+ ftest->code = BPF_S_ALU_DIV_X;
+ break;
case BPF_ALU|BPF_AND|BPF_K:
+ ftest->code = BPF_S_ALU_AND_K;
+ break;
case BPF_ALU|BPF_AND|BPF_X:
+ ftest->code = BPF_S_ALU_AND_X;
+ break;
case BPF_ALU|BPF_OR|BPF_K:
+ ftest->code = BPF_S_ALU_OR_K;
+ break;
case BPF_ALU|BPF_OR|BPF_X:
+ ftest->code = BPF_S_ALU_OR_X;
+ break;
case BPF_ALU|BPF_LSH|BPF_K:
+ ftest->code = BPF_S_ALU_LSH_K;
+ break;
case BPF_ALU|BPF_LSH|BPF_X:
+ ftest->code = BPF_S_ALU_LSH_X;
+ break;
case BPF_ALU|BPF_RSH|BPF_K:
+ ftest->code = BPF_S_ALU_RSH_K;
+ break;
case BPF_ALU|BPF_RSH|BPF_X:
+ ftest->code = BPF_S_ALU_RSH_X;
+ break;
case BPF_ALU|BPF_NEG:
+ ftest->code = BPF_S_ALU_NEG;
+ break;
case BPF_LD|BPF_W|BPF_ABS:
+ ftest->code = BPF_S_LD_W_ABS;
+ break;
case BPF_LD|BPF_H|BPF_ABS:
+ ftest->code = BPF_S_LD_H_ABS;
+ break;
case BPF_LD|BPF_B|BPF_ABS:
+ ftest->code = BPF_S_LD_B_ABS;
+ break;
case BPF_LD|BPF_W|BPF_LEN:
+ ftest->code = BPF_S_LD_W_LEN;
+ break;
case BPF_LD|BPF_W|BPF_IND:
+ ftest->code = BPF_S_LD_W_IND;
+ break;
case BPF_LD|BPF_H|BPF_IND:
+ ftest->code = BPF_S_LD_H_IND;
+ break;
case BPF_LD|BPF_B|BPF_IND:
+ ftest->code = BPF_S_LD_B_IND;
+ break;
case BPF_LD|BPF_IMM:
+ ftest->code = BPF_S_LD_IMM;
+ break;
case BPF_LDX|BPF_W|BPF_LEN:
+ ftest->code = BPF_S_LDX_W_LEN;
+ break;
case BPF_LDX|BPF_B|BPF_MSH:
+ ftest->code = BPF_S_LDX_B_MSH;
+ break;
case BPF_LDX|BPF_IMM:
+ ftest->code = BPF_S_LDX_IMM;
+ break;
case BPF_MISC|BPF_TAX:
+ ftest->code = BPF_S_MISC_TAX;
+ break;
case BPF_MISC|BPF_TXA:
+ ftest->code = BPF_S_MISC_TXA;
+ break;
case BPF_RET|BPF_K:
+ ftest->code = BPF_S_RET_K;
+ break;
case BPF_RET|BPF_A:
+ ftest->code = BPF_S_RET_A;
break;
/* Some instructions need special checks */
- case BPF_ALU|BPF_DIV|BPF_K:
/* check for division by zero */
+ case BPF_ALU|BPF_DIV|BPF_K:
if (ftest->k == 0)
return -EINVAL;
+ ftest->code = BPF_S_ALU_DIV_K;
break;
+ /* check for invalid memory addresses */
case BPF_LD|BPF_MEM:
+ if (ftest->k >= BPF_MEMWORDS)
+ return -EINVAL;
+ ftest->code = BPF_S_LD_MEM;
+ break;
case BPF_LDX|BPF_MEM:
+ if (ftest->k >= BPF_MEMWORDS)
+ return -EINVAL;
+ ftest->code = BPF_S_LDX_MEM;
+ break;
case BPF_ST:
+ if (ftest->k >= BPF_MEMWORDS)
+ return -EINVAL;
+ ftest->code = BPF_S_ST;
+ break;
case BPF_STX:
- /* check for invalid memory addresses */
if (ftest->k >= BPF_MEMWORDS)
return -EINVAL;
+ ftest->code = BPF_S_STX;
break;
case BPF_JMP|BPF_JA:
@@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
*/
if (ftest->k >= (unsigned)(flen-pc-1))
return -EINVAL;
+ ftest->code = BPF_S_JMP_JA;
break;
case BPF_JMP|BPF_JEQ|BPF_K:
+ ftest->code = BPF_S_JMP_JEQ_K;
+ break;
case BPF_JMP|BPF_JEQ|BPF_X:
+ ftest->code = BPF_S_JMP_JEQ_X;
+ break;
case BPF_JMP|BPF_JGE|BPF_K:
+ ftest->code = BPF_S_JMP_JGE_K;
+ break;
case BPF_JMP|BPF_JGE|BPF_X:
+ ftest->code = BPF_S_JMP_JGE_X;
+ break;
case BPF_JMP|BPF_JGT|BPF_K:
+ ftest->code = BPF_S_JMP_JGT_K;
+ break;
case BPF_JMP|BPF_JGT|BPF_X:
+ ftest->code = BPF_S_JMP_JGT_X;
+ break;
case BPF_JMP|BPF_JSET|BPF_K:
+ ftest->code = BPF_S_JMP_JSET_K;
+ break;
case BPF_JMP|BPF_JSET|BPF_X:
+ ftest->code = BPF_S_JMP_JSET_X;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
/* for conditionals both must be safe */
+ switch (ftest->code) {
+ case BPF_S_JMP_JEQ_K:
+ case BPF_S_JMP_JEQ_X:
+ case BPF_S_JMP_JGE_K:
+ case BPF_S_JMP_JGE_X:
+ case BPF_S_JMP_JGT_K:
+ case BPF_S_JMP_JGT_X:
+ case BPF_S_JMP_JSET_X:
+ case BPF_S_JMP_JSET_K:
if (pc + ftest->jt + 1 >= flen ||
pc + ftest->jf + 1 >= flen)
return -EINVAL;
- break;
+ }
+ }
+ /* last instruction must be a RET code */
+ switch (filter[flen - 1].code) {
+ case BPF_S_RET_K:
+ case BPF_S_RET_A:
+ return 0;
+ break;
default:
return -EINVAL;
}
- }
-
- return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
}
EXPORT_SYMBOL(sk_chk_filter);
--
1.6.6.196.g1f735.dirty
next reply other threads:[~2010-06-20 3:15 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-06-20 3:05 Hagen Paul Pfeifer [this message]
2010-06-20 5:16 ` [PATCH] net: optimize Berkeley Packet Filter (BPF) processing Stephen Hemminger
2010-06-20 9:50 ` Hagen Paul Pfeifer
2010-06-20 18:49 ` Stephen Hemminger
2010-06-20 22:09 ` David Miller
2010-06-20 21:57 ` Hagen Paul Pfeifer
2010-06-21 2:15 ` Changli Gao
2010-06-21 7:57 ` Hagen Paul Pfeifer
2010-06-26 4:36 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1277003136-5522-1-git-send-email-hagen@jauu.net \
--to=hagen@jauu.net \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.