All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 1/2] bpf: make programs see skb->data == L2 for ingress and egress
@ 2015-06-04  5:07 Alexei Starovoitov
  2015-06-04  5:07 ` [PATCH net-next 2/2] bpf: allow programs to write to certain skb fields Alexei Starovoitov
  2015-06-04  8:11 ` [PATCH net-next 1/2] bpf: make programs see skb->data == L2 for ingress and egress David Miller
  0 siblings, 2 replies; 3+ messages in thread
From: Alexei Starovoitov @ 2015-06-04  5:07 UTC (permalink / raw)
  To: David S. Miller; +Cc: Eric Dumazet, Jamal Hadi Salim, Daniel Borkmann, netdev

eBPF programs attached to ingress and egress qdiscs see inconsistent skb->data.
For ingress L2 header is already pulled, whereas for egress it's present.
This is known to program writers which are currently forced to use
BPF_LL_OFF workaround.
Since programs don't change skb internal pointers it is safe to do
pull/push right around invocation of the program and earlier taps and
later pt->func() will not be affected.
Multiple taps via packet_rcv(), tpacket_rcv() are doing the same trick
around run_filter/BPF_PROG_RUN even if skb_shared.

This fix finally allows programs to use optimized LD_ABS/IND instructions
without BPF_LL_OFF for higher performance.
tc ingress + cls_bpf + samples/bpf/tcbpf1_kern.o
       w/o JIT   w/JIT
before  20.5     23.6 Mpps
after   21.8     26.6 Mpps

Old programs with BPF_LL_OFF will still work as-is.

We can now undo most of the earlier workaround commit:
a166151cbe33 ("bpf: fix bpf helpers to use skb->mac_header relative offsets")

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---

Earlier versions were trying to do too much to make ingress and egress qdisc
consistent for all classifiers and actions or had too big of a scope of push/pull:
v1: http://thread.gmane.org/gmane.linux.network/358168/focus=358168
v2: http://thread.gmane.org/gmane.linux.network/358524/focus=358532
v3: http://thread.gmane.org/gmane.linux.network/358733/focus=358734
v4: http://thread.gmane.org/gmane.linux.network/359129/focus=359694

skb->data will still be different for all non-bpf classifiers/actions.

This fix will still allow us to explore further optimizations like
moving skb_pull() from eth_type_trans() into netif_receive_skb() in the future.

Here is how ingress callchain looks:

netif_receive_skb,          // likely skb->users == 1
  deliver_skb
    packet_rcv              // skb->users == 2
      orig_skb_data = skb->data
      push l2
      res = BPF_PROG_RUN
      if (!res) {
        skb->data = orig_skb_data
        consume_skb(skb)    // skb->users == 1
        goto out
      }

      skb2 = skb_clone(skb)
      skb->data = orig_skb_data
      consume_skb(skb)      // skb->users == 1
      __skb_queue_tail(skb2)

  deliver_skb
    Tpacket_rcv             // skb->users == 2
      orig_skb_data = skb->data
      push l2
      res = BPF_PROG_RUN
      if (!res) {
        skb->data = orig_skb_data
        kfree_skb(skb)      // skb->users == 1
	goto out
      }

      if (...) {
        skb2 = skb_clone(skb)
        __skb_queue_tail(skb2)
      }
      skb_copy_bits(skb)
      skb->data = orig_skb_data
      kfree_skb(skb)        // skb->users == 1

  tc_classify
     cls_u32 and other classifiers don't touch skb
       actions like mirred do clone before redirect, etc.

     cls_bpf               // skb->users == 1
       push l2
       res = BPF_PROG_RUN
       pull l2
       actions             // still see skb->data at L3

     cls_xxx               // still see skb->data at L3
       actions

  netfilter
  vlan_do_receive
  bridge

  deliver_skb
    mpls_forward and other ptype specific taps

  ip_rcv                   // skb->users == 1

 net/core/filter.c         |   27 +++------------------------
 net/sched/act_bpf.c       |    9 ++++++++-
 net/sched/cls_bpf.c       |   12 +++++++++++-
 samples/bpf/tcbpf1_kern.c |    8 ++++----
 4 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 64c121c09655..e76b8ec1b0ac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1239,21 +1239,6 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	return 0;
 }
 
-/**
- *	bpf_skb_clone_not_writable - is the header of a clone not writable
- *	@skb: buffer to check
- *	@len: length up to which to write, can be negative
- *
- *	Returns true if modifying the header part of the cloned buffer
- *	does require the data to be copied. I.e. this version works with
- *	negative lengths needed for eBPF case!
- */
-static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
-{
-	return skb_header_cloned(skb) ||
-	       (int) skb_headroom(skb) + len > skb->hdr_len;
-}
-
 #define BPF_RECOMPUTE_CSUM(flags)	((flags) & 1)
 
 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
@@ -1276,9 +1261,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 	if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
 		return -EFAULT;
 
-	offset -= skb->data - skb_mac_header(skb);
 	if (unlikely(skb_cloned(skb) &&
-		     bpf_skb_clone_unwritable(skb, offset + len)))
+		     !skb_clone_writable(skb, offset + len)))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1322,9 +1306,8 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
 
-	offset -= skb->data - skb_mac_header(skb);
 	if (unlikely(skb_cloned(skb) &&
-		     bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
+		     !skb_clone_writable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1370,9 +1353,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
 
-	offset -= skb->data - skb_mac_header(skb);
 	if (unlikely(skb_cloned(skb) &&
-		     bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
+		     !skb_clone_writable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1426,9 +1408,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
 	if (unlikely(!skb2))
 		return -ENOMEM;
 
-	if (G_TC_AT(skb2->tc_verd) & AT_INGRESS)
-		skb_push(skb2, skb2->mac_len);
-
 	if (BPF_IS_REDIRECT_INGRESS(flags))
 		return dev_forward_skb(dev, skb2);
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index dc6a2d324bd8..b32367deab77 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -37,6 +37,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 {
 	struct tcf_bpf *prog = act->priv;
 	int action, filter_res;
+	u32 at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
 
 	if (unlikely(!skb_mac_header_was_set(skb)))
 		return TC_ACT_UNSPEC;
@@ -48,7 +49,13 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
-	filter_res = BPF_PROG_RUN(prog->filter, skb);
+	if (at_ingress) {
+		__skb_push(skb, skb->mac_len);
+		filter_res = BPF_PROG_RUN(prog->filter, skb);
+		__skb_pull(skb, skb->mac_len);
+	} else {
+		filter_res = BPF_PROG_RUN(prog->filter, skb);
+	}
 	rcu_read_unlock();
 
 	/* A BPF program may overwrite the default action opcode.
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 91bd9c19471d..0e5e747c73f3 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -64,6 +64,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 {
 	struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
 	struct cls_bpf_prog *prog;
+	u32 at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
 	int ret = -1;
 
 	if (unlikely(!skb_mac_header_was_set(skb)))
@@ -72,7 +73,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
 	list_for_each_entry_rcu(prog, &head->plist, link) {
-		int filter_res = BPF_PROG_RUN(prog->filter, skb);
+		int filter_res;
+
+		if (at_ingress) {
+			/* It is safe to push/pull even if skb_shared() */
+			__skb_push(skb, skb->mac_len);
+			filter_res = BPF_PROG_RUN(prog->filter, skb);
+			__skb_pull(skb, skb->mac_len);
+		} else {
+			filter_res = BPF_PROG_RUN(prog->filter, skb);
+		}
 
 		if (filter_res == 0)
 			continue;
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 7c27710f8296..9bfb2eb34563 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -21,7 +21,7 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
 
 static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
 {
-	__u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF);
+	__u8 old_tos = load_byte(skb, TOS_OFF);
 
 	bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
 	bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
@@ -34,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
 
 static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
 {
-	__u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF));
+	__u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
 
 	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
 	bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
@@ -44,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
 #define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
 static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
 {
-	__u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF));
+	__u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
 
 	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
 	bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
@@ -53,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
 SEC("classifier")
 int bpf_prog1(struct __sk_buff *skb)
 {
-	__u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol));
+	__u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
 	long *value;
 
 	if (proto == IPPROTO_TCP) {
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH net-next 2/2] bpf: allow programs to write to certain skb fields
  2015-06-04  5:07 [PATCH net-next 1/2] bpf: make programs see skb->data == L2 for ingress and egress Alexei Starovoitov
@ 2015-06-04  5:07 ` Alexei Starovoitov
  2015-06-04  8:11 ` [PATCH net-next 1/2] bpf: make programs see skb->data == L2 for ingress and egress David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: Alexei Starovoitov @ 2015-06-04  5:07 UTC (permalink / raw)
  To: David S. Miller; +Cc: Eric Dumazet, Jamal Hadi Salim, Daniel Borkmann, netdev

allow programs read/write skb->mark, tc_index fields and
((struct qdisc_skb_cb *)cb)->data.

mark and tc_index are generically useful in TC.
cb[0]-cb[4] are primarily used to pass arguments from one
program to another called via bpf_tail_call() which can
be seen in sockex3_kern.c example.

All fields of 'struct __sk_buff' are readable to socket and tc_cls_act progs.
mark, tc_index are writeable from tc_cls_act only.
cb[0]-cb[4] are writeable by both sockets and tc_cls_act.

Add verifier tests and improve sample code.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---
This patch is independent from previous push/pull l2 patch, but
they both modify net/core/filter.c, so sent together.

 include/linux/bpf.h         |    3 +-
 include/uapi/linux/bpf.h    |    2 +
 kernel/bpf/verifier.c       |   37 ++++++++++++-----
 net/core/filter.c           |   93 +++++++++++++++++++++++++++++++++++++------
 samples/bpf/sockex3_kern.c  |   35 +++++-----------
 samples/bpf/test_verifier.c |   84 +++++++++++++++++++++++++++++++++++++-
 6 files changed, 206 insertions(+), 48 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ca854e5bb2f7..2235aee8096a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -105,7 +105,8 @@ struct bpf_verifier_ops {
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
 
-	u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
+	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
+				  int src_reg, int ctx_off,
 				  struct bpf_insn *insn);
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 42aa19abab86..602f05b7a275 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -248,6 +248,8 @@ struct __sk_buff {
 	__u32 priority;
 	__u32 ingress_ifindex;
 	__u32 ifindex;
+	__u32 tc_index;
+	__u32 cb[5];
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cfd9a40b9a5a..039d866fd36a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1692,6 +1692,8 @@ static int do_check(struct verifier_env *env)
 			}
 
 		} else if (class == BPF_STX) {
+			enum bpf_reg_type dst_reg_type;
+
 			if (BPF_MODE(insn->code) == BPF_XADD) {
 				err = check_xadd(env, insn);
 				if (err)
@@ -1700,11 +1702,6 @@ static int do_check(struct verifier_env *env)
 				continue;
 			}
 
-			if (BPF_MODE(insn->code) != BPF_MEM ||
-			    insn->imm != 0) {
-				verbose("BPF_STX uses reserved fields\n");
-				return -EINVAL;
-			}
 			/* check src1 operand */
 			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
 			if (err)
@@ -1714,6 +1711,8 @@ static int do_check(struct verifier_env *env)
 			if (err)
 				return err;
 
+			dst_reg_type = regs[insn->dst_reg].type;
+
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
@@ -1721,6 +1720,15 @@ static int do_check(struct verifier_env *env)
 			if (err)
 				return err;
 
+			if (insn->imm == 0) {
+				insn->imm = dst_reg_type;
+			} else if (dst_reg_type != insn->imm &&
+				   (dst_reg_type == PTR_TO_CTX ||
+				    insn->imm == PTR_TO_CTX)) {
+				verbose("same insn cannot be used with different pointers\n");
+				return -EINVAL;
+			}
+
 		} else if (class == BPF_ST) {
 			if (BPF_MODE(insn->code) != BPF_MEM ||
 			    insn->src_reg != BPF_REG_0) {
@@ -1839,12 +1847,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		if (BPF_CLASS(insn->code) == BPF_LDX &&
-		    (BPF_MODE(insn->code) != BPF_MEM ||
-		     insn->imm != 0)) {
+		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
 			verbose("BPF_LDX uses reserved fields\n");
 			return -EINVAL;
 		}
 
+		if (BPF_CLASS(insn->code) == BPF_STX &&
+		    ((BPF_MODE(insn->code) != BPF_MEM &&
+		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
+			verbose("BPF_STX uses reserved fields\n");
+			return -EINVAL;
+		}
+
 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
 			struct bpf_map *map;
 			struct fd f;
@@ -1967,12 +1981,17 @@ static int convert_ctx_accesses(struct verifier_env *env)
 	struct bpf_prog *new_prog;
 	u32 cnt;
 	int i;
+	enum bpf_access_type type;
 
 	if (!env->prog->aux->ops->convert_ctx_access)
 		return 0;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
+		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+			type = BPF_READ;
+		else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+			type = BPF_WRITE;
+		else
 			continue;
 
 		if (insn->imm != PTR_TO_CTX) {
@@ -1982,7 +2001,7 @@ static int convert_ctx_accesses(struct verifier_env *env)
 		}
 
 		cnt = env->prog->aux->ops->
-			convert_ctx_access(insn->dst_reg, insn->src_reg,
+			convert_ctx_access(type, insn->dst_reg, insn->src_reg,
 					   insn->off, insn_buf);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 			verbose("bpf verifier is misconfigured\n");
diff --git a/net/core/filter.c b/net/core/filter.c
index e76b8ec1b0ac..d271c06bf01f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1464,13 +1464,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static bool sk_filter_is_valid_access(int off, int size,
-				      enum bpf_access_type type)
+static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 {
-	/* only read is allowed */
-	if (type != BPF_READ)
-		return false;
-
 	/* check bounds */
 	if (off < 0 || off >= sizeof(struct __sk_buff))
 		return false;
@@ -1486,8 +1481,42 @@ static bool sk_filter_is_valid_access(int off, int size,
 	return true;
 }
 
-static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
-					struct bpf_insn *insn_buf)
+static bool sk_filter_is_valid_access(int off, int size,
+				      enum bpf_access_type type)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct __sk_buff, cb[0]) ...
+			offsetof(struct __sk_buff, cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	return __is_valid_access(off, size, type);
+}
+
+static bool tc_cls_act_is_valid_access(int off, int size,
+				       enum bpf_access_type type)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct __sk_buff, mark):
+		case offsetof(struct __sk_buff, tc_index):
+		case offsetof(struct __sk_buff, cb[0]) ...
+			offsetof(struct __sk_buff, cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+	return __is_valid_access(off, size, type);
+}
+
+static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+				      int src_reg, int ctx_off,
+				      struct bpf_insn *insn_buf)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -1539,7 +1568,15 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 		break;
 
 	case offsetof(struct __sk_buff, mark):
-		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+					      offsetof(struct sk_buff, mark));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+					      offsetof(struct sk_buff, mark));
+		break;
 
 	case offsetof(struct __sk_buff, pkt_type):
 		return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
@@ -1554,6 +1591,38 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 	case offsetof(struct __sk_buff, vlan_tci):
 		return convert_skb_access(SKF_AD_VLAN_TAG,
 					  dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, cb[0]) ...
+		offsetof(struct __sk_buff, cb[4]):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+
+		ctx_off -= offsetof(struct __sk_buff, cb[0]);
+		ctx_off += offsetof(struct sk_buff, cb);
+		ctx_off += offsetof(struct qdisc_skb_cb, data);
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+		break;
+
+	case offsetof(struct __sk_buff, tc_index):
+#ifdef CONFIG_NET_SCHED
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
+					      offsetof(struct sk_buff, tc_index));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+					      offsetof(struct sk_buff, tc_index));
+		break;
+#else
+		if (type == BPF_WRITE)
+			*insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
+		else
+			*insn++ = BPF_MOV64_IMM(dst_reg, 0);
+		break;
+#endif
 	}
 
 	return insn - insn_buf;
@@ -1562,13 +1631,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 static const struct bpf_verifier_ops sk_filter_ops = {
 	.get_func_proto = sk_filter_func_proto,
 	.is_valid_access = sk_filter_is_valid_access,
-	.convert_ctx_access = sk_filter_convert_ctx_access,
+	.convert_ctx_access = bpf_net_convert_ctx_access,
 };
 
 static const struct bpf_verifier_ops tc_cls_act_ops = {
 	.get_func_proto = tc_cls_act_func_proto,
-	.is_valid_access = sk_filter_is_valid_access,
-	.convert_ctx_access = sk_filter_convert_ctx_access,
+	.is_valid_access = tc_cls_act_is_valid_access,
+	.convert_ctx_access = bpf_net_convert_ctx_access,
 };
 
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 2625b987944f..41ae2fd21b13 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -89,7 +89,6 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
 
 struct globals {
 	struct flow_keys flow;
-	__u32 nhoff;
 };
 
 struct bpf_map_def SEC("maps") percpu_map = {
@@ -139,7 +138,7 @@ static void update_stats(struct __sk_buff *skb, struct globals *g)
 static __always_inline void parse_ip_proto(struct __sk_buff *skb,
 					   struct globals *g, __u32 ip_proto)
 {
-	__u32 nhoff = g->nhoff;
+	__u32 nhoff = skb->cb[0];
 	int poff;
 
 	switch (ip_proto) {
@@ -165,7 +164,7 @@ static __always_inline void parse_ip_proto(struct __sk_buff *skb,
 		if (gre_flags & GRE_SEQ)
 			nhoff += 4;
 
-		g->nhoff = nhoff;
+		skb->cb[0] = nhoff;
 		parse_eth_proto(skb, gre_proto);
 		break;
 	}
@@ -195,7 +194,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
 	if (!g)
 		return 0;
 
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	if (unlikely(ip_is_fragment(skb, nhoff)))
 		return 0;
@@ -210,7 +209,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
 	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
 	nhoff += (verlen & 0xF) << 2;
 
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 	parse_ip_proto(skb, g, ip_proto);
 	return 0;
 }
@@ -223,7 +222,7 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
 	if (!g)
 		return 0;
 
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	ip_proto = load_byte(skb,
 			     nhoff + offsetof(struct ipv6hdr, nexthdr));
@@ -233,25 +232,21 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
 				     nhoff + offsetof(struct ipv6hdr, daddr));
 	nhoff += sizeof(struct ipv6hdr);
 
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 	parse_ip_proto(skb, g, ip_proto);
 	return 0;
 }
 
 PROG(PARSE_VLAN)(struct __sk_buff *skb)
 {
-	struct globals *g = this_cpu_globals();
 	__u32 nhoff, proto;
 
-	if (!g)
-		return 0;
-
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
 						h_vlan_encapsulated_proto));
 	nhoff += sizeof(struct vlan_hdr);
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 
 	parse_eth_proto(skb, proto);
 
@@ -260,17 +255,13 @@ PROG(PARSE_VLAN)(struct __sk_buff *skb)
 
 PROG(PARSE_MPLS)(struct __sk_buff *skb)
 {
-	struct globals *g = this_cpu_globals();
 	__u32 nhoff, label;
 
-	if (!g)
-		return 0;
-
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	label = load_word(skb, nhoff);
 	nhoff += sizeof(struct mpls_label);
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 
 	if (label & MPLS_LS_S_MASK) {
 		__u8 verlen = load_byte(skb, nhoff);
@@ -288,14 +279,10 @@ PROG(PARSE_MPLS)(struct __sk_buff *skb)
 SEC("socket/0")
 int main_prog(struct __sk_buff *skb)
 {
-	struct globals *g = this_cpu_globals();
 	__u32 nhoff = ETH_HLEN;
 	__u32 proto = load_half(skb, 12);
 
-	if (!g)
-		return 0;
-
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 	parse_eth_proto(skb, proto);
 	return 0;
 }
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index 12f3780af73f..693605997abc 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -29,6 +29,7 @@ struct bpf_test {
 		ACCEPT,
 		REJECT
 	} result;
+	enum bpf_prog_type prog_type;
 };
 
 static struct bpf_test tests[] = {
@@ -743,6 +744,84 @@ static struct bpf_test tests[] = {
 		.errstr = "different pointers",
 		.result = REJECT,
 	},
+	{
+		"check skb->mark is not writeable by sockets",
+		.insns = {
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check skb->tc_index is not writeable by sockets",
+		.insns = {
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check non-u32 access to cb",
+		.insns = {
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check out of range skb->cb access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[60])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_ACT,
+	},
+	{
+		"write skb fields from socket prog",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"write skb fields from tc_cls_act prog",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 };
 
 static int probe_filter_length(struct bpf_insn *fp)
@@ -775,6 +854,7 @@ static int test(void)
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct bpf_insn *prog = tests[i].insns;
+		int prog_type = tests[i].prog_type;
 		int prog_len = probe_filter_length(prog);
 		int *fixup = tests[i].fixup;
 		int map_fd = -1;
@@ -789,8 +869,8 @@ static int test(void)
 		}
 		printf("#%d %s ", i, tests[i].descr);
 
-		prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog,
-					prog_len * sizeof(struct bpf_insn),
+		prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
+					prog, prog_len * sizeof(struct bpf_insn),
 					"GPL", 0);
 
 		if (tests[i].result == ACCEPT) {
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH net-next 1/2] bpf: make programs see skb->data == L2 for ingress and egress
  2015-06-04  5:07 [PATCH net-next 1/2] bpf: make programs see skb->data == L2 for ingress and egress Alexei Starovoitov
  2015-06-04  5:07 ` [PATCH net-next 2/2] bpf: allow programs to write to certain skb fields Alexei Starovoitov
@ 2015-06-04  8:11 ` David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: David Miller @ 2015-06-04  8:11 UTC (permalink / raw)
  To: ast; +Cc: edumazet, jhs, daniel, netdev

From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed,  3 Jun 2015 22:07:10 -0700

> +	u32 at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;

Please use a 'bool' for this.

> @@ -64,6 +64,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
>  {
>  	struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
>  	struct cls_bpf_prog *prog;
> +	u32 at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
>  	int ret = -1;
>  
>  	if (unlikely(!skb_mac_header_was_set(skb)))

Likewise.

Thank you.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-06-04  8:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-04  5:07 [PATCH net-next 1/2] bpf: make programs see skb->data == L2 for ingress and egress Alexei Starovoitov
2015-06-04  5:07 ` [PATCH net-next 2/2] bpf: allow programs to write to certain skb fields Alexei Starovoitov
2015-06-04  8:11 ` [PATCH net-next 1/2] bpf: make programs see skb->data == L2 for ingress and egress David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.