All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next RFC 1/2] Organize MPLS headers
@ 2018-03-28 16:27 Shrijeet Mukherjee
  2018-03-28 16:27 ` [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for EBPF Shrijeet Mukherjee
  2018-03-28 18:20 ` [PATCH bpf-next RFC 1/2] Organize MPLS headers David Ahern
  0 siblings, 2 replies; 5+ messages in thread
From: Shrijeet Mukherjee @ 2018-03-28 16:27 UTC (permalink / raw)
  To: daniel; +Cc: netdev, roopa, dsa, ast

From: Shrijeet Mukherjee <shrijeet@gmail.com>

Prepare shared headers for new MPLS label push/pop EBPF helpers

Signed-off-by: Shrijeet Mukherjee <shm@cumulusnetworks.com>
---
 include/net/mpls.h        |  1 +
 net/core/filter.c         |  2 ++
 net/mpls/af_mpls.c        |  8 ++++----
 net/mpls/internal.h       | 31 -------------------------------
 net/mpls/mpls_iptunnel.c  |  2 +-
 net/openvswitch/actions.c | 12 ++++++------
 6 files changed, 14 insertions(+), 42 deletions(-)

diff --git a/include/net/mpls.h b/include/net/mpls.h
index 1dbc669b770e..2583dbc689b8 100644
--- a/include/net/mpls.h
+++ b/include/net/mpls.h
@@ -16,6 +16,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/netdevice.h>
+#include <uapi/linux/mpls.h>
 
 #define MPLS_HLEN 4
 
diff --git a/net/core/filter.c b/net/core/filter.c
index c86f03fd9ea5..00f62fafc788 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -29,6 +29,7 @@
 #include <linux/sock_diag.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/mpls.h>
 #include <linux/netdevice.h>
 #include <linux/if_packet.h>
 #include <linux/if_arp.h>
@@ -56,6 +57,7 @@
 #include <net/sock_reuseport.h>
 #include <net/busy_poll.h>
 #include <net/tcp.h>
+#include <net/mpls.h>
 #include <linux/bpf_trace.h>
 
 /**
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index d4a89a8be013..4e05391b77f0 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -170,7 +170,7 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
 			break;
 
 		/* Read and decode the current label */
-		hdr = mpls_hdr(skb) + label_index;
+		hdr = skb_mpls_hdr(skb) + label_index;
 		dec = mpls_entry_decode(hdr);
 
 		/* RFC6790 - reserved labels MUST NOT be used as keys
@@ -379,7 +379,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 		goto err;
 
 	/* Read and decode the label */
-	hdr = mpls_hdr(skb);
+	hdr = skb_mpls_hdr(skb);
 	dec = mpls_entry_decode(hdr);
 
 	rt = mpls_route_input_rcu(net, dec.label);
@@ -440,7 +440,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 		skb_push(skb, new_header_size);
 		skb_reset_network_header(skb);
 		/* Push the new labels */
-		hdr = mpls_hdr(skb);
+		hdr = skb_mpls_hdr(skb);
 		bos = dec.bos;
 		for (i = nh->nh_labels - 1; i >= 0; i--) {
 			hdr[i] = mpls_entry_encode(nh->nh_label[i],
@@ -2203,7 +2203,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 		skb_reset_network_header(skb);
 
 		/* Push new labels */
-		hdr = mpls_hdr(skb);
+		hdr = skb_mpls_hdr(skb);
 		bos = true;
 		for (i = n_labels - 1; i >= 0; i--) {
 			hdr[i] = mpls_entry_encode(labels[i],
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 768a302879b4..cd93cb201fed 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -8,13 +8,6 @@
  */
 #define MAX_NEW_LABELS 30
 
-struct mpls_entry_decoded {
-	u32 label;
-	u8 ttl;
-	u8 tc;
-	u8 bos;
-};
-
 struct mpls_pcpu_stats {
 	struct mpls_link_stats	stats;
 	struct u64_stats_sync	syncp;
@@ -172,30 +165,6 @@ struct mpls_route { /* next hop label forwarding entry */
 
 #define endfor_nexthops(rt) }
 
-static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos)
-{
-	struct mpls_shim_hdr result;
-	result.label_stack_entry =
-		cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) |
-			    (tc << MPLS_LS_TC_SHIFT) |
-			    (bos ? (1 << MPLS_LS_S_SHIFT) : 0) |
-			    (ttl << MPLS_LS_TTL_SHIFT));
-	return result;
-}
-
-static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
-{
-	struct mpls_entry_decoded result;
-	unsigned entry = be32_to_cpu(hdr->label_stack_entry);
-
-	result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
-	result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
-	result.tc =  (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
-	result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
-
-	return result;
-}
-
 static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
 {
 	return rcu_dereference_rtnl(dev->mpls_ptr);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 6e558a419f60..f8346dfba9c8 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -127,7 +127,7 @@ static int mpls_xmit(struct sk_buff *skb)
 	skb->protocol = htons(ETH_P_MPLS_UC);
 
 	/* Push the new labels */
-	hdr = mpls_hdr(skb);
+	hdr = skb_mpls_hdr(skb);
 	bos = true;
 	for (i = tun_encap_info->labels - 1; i >= 0; i--) {
 		hdr[i] = mpls_entry_encode(tun_encap_info->label[i],
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 30a5df27116e..c6fdc632e6fb 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -205,7 +205,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, skb->mac_len);
 
-	new_mpls_lse = mpls_hdr(skb);
+	new_mpls_lse = skb_mpls_hdr(skb);
 	new_mpls_lse->label_stack_entry = mpls->mpls_lse;
 
 	skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
@@ -227,7 +227,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	if (unlikely(err))
 		return err;
 
-	skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
+	skb_postpull_rcsum(skb, skb_mpls_hdr(skb), MPLS_HLEN);
 
 	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
 		skb->mac_len);
@@ -239,10 +239,10 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
 		struct ethhdr *hdr;
 
-		/* mpls_hdr() is used to locate the ethertype field correctly in the
-		 * presence of VLAN tags.
+		/* skb_mpls_hdr() is used to locate the ethertype field
+		 * correctly in the presence of VLAN tags.
 		 */
-		hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
+		hdr = (struct ethhdr *)((void *)skb_mpls_hdr(skb) - ETH_HLEN);
 		update_ethertype(skb, hdr, ethertype);
 	}
 	if (eth_p_mpls(skb->protocol))
@@ -263,7 +263,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
 	if (unlikely(err))
 		return err;
 
-	stack = mpls_hdr(skb);
+	stack = skb_mpls_hdr(skb);
 	lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		__be32 diff[] = { ~(stack->label_stack_entry), lse };
-- 
2.16.2.windows.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for EBPF
  2018-03-28 16:27 [PATCH bpf-next RFC 1/2] Organize MPLS headers Shrijeet Mukherjee
@ 2018-03-28 16:27 ` Shrijeet Mukherjee
  2018-03-28 18:23   ` David Ahern
  2018-03-28 18:20 ` [PATCH bpf-next RFC 1/2] Organize MPLS headers David Ahern
  1 sibling, 1 reply; 5+ messages in thread
From: Shrijeet Mukherjee @ 2018-03-28 16:27 UTC (permalink / raw)
  To: daniel; +Cc: netdev, roopa, dsa, ast

From: Shrijeet Mukherjee <shrijeet@gmail.com>

Push: takes an array/size of labels and encaps
Pop : Takes a number and pops the top "n" labels

Signed-off-by: Shrijeet Mukherjee <shm@cumulusnetworks.com>

PS: Will be adding tests as well, refactoring my test into something
smaller.

---
 include/linux/bpf.h      |   2 +
 include/net/mpls.h       |  38 +++++++++++++-
 include/uapi/linux/bpf.h |  11 +++-
 net/core/filter.c        | 127 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 175 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 819229c80eca..b38bfabc1fb5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -672,6 +672,8 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
+extern const struct bpf_func_proto bpf_skb_mpls_push_proto;
+extern const struct bpf_func_proto bpf_skb_mpls_pop_proto;
 extern const struct bpf_func_proto bpf_get_stackid_proto;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;
 
diff --git a/include/net/mpls.h b/include/net/mpls.h
index 2583dbc689b8..3a5b8c00823d 100644
--- a/include/net/mpls.h
+++ b/include/net/mpls.h
@@ -24,14 +24,50 @@ struct mpls_shim_hdr {
 	__be32 label_stack_entry;
 };
 
+struct mpls_entry_decoded {
+	u32 label;
+	u8 ttl;
+	u8 tc;
+	u8 bos;
+};
+
 static inline bool eth_p_mpls(__be16 eth_type)
 {
 	return eth_type == htons(ETH_P_MPLS_UC) ||
 		eth_type == htons(ETH_P_MPLS_MC);
 }
 
-static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
+static inline struct mpls_shim_hdr *skb_mpls_hdr(const struct sk_buff *skb)
 {
 	return (struct mpls_shim_hdr *)skb_network_header(skb);
 }
+
+static inline struct mpls_shim_hdr
+mpls_entry_encode(u32 label, unsigned int ttl, unsigned int tc, bool bos)
+{
+	struct mpls_shim_hdr result;
+
+	result.label_stack_entry =
+		cpu_to_be32((label << MPLS_LS_LABEL_SHIFT)
+            | (tc << MPLS_LS_TC_SHIFT)
+            | (bos ? (1 << MPLS_LS_S_SHIFT) : 0)
+            | (ttl << MPLS_LS_TTL_SHIFT));
+
+	return result;
+}
+
+static inline
+struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
+{
+	struct mpls_entry_decoded result;
+	unsigned int entry = be32_to_cpu(hdr->label_stack_entry);
+
+	result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+	result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+	result.tc =  (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+	result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
+
+	return result;
+}
+
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18b7c510c511..2278548e1f8f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -439,6 +439,12 @@ union bpf_attr {
  * int bpf_skb_vlan_pop(skb)
  *     Return: 0 on success or negative error
  *
+ * int bpf_skb_mpls_push(skb, num_lbls, lbls[])
+ *     Return 0 on success or negative error
+ *
+ * int bpf_skb_mpls_pop(skb, num_lbls)
+ *     Return number of popped labels, 0 is no-op, deliver packet to current dst
+ *
  * int bpf_skb_get_tunnel_key(skb, key, size, flags)
  * int bpf_skb_set_tunnel_key(skb, key, size, flags)
  *     retrieve or populate tunnel metadata
@@ -794,7 +800,10 @@ union bpf_attr {
 	FN(msg_redirect_map),		\
 	FN(msg_apply_bytes),		\
 	FN(msg_cork_bytes),		\
-	FN(msg_pull_data),
+	FN(msg_pull_data),		\
+	FN(skb_mpls_push),		\
+	FN(skb_mpls_pop),
+
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 00f62fafc788..c96ae8ef423d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2522,7 +2522,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
 }
 
 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
-	   u32, mode, u64, flags)
+				u32, mode, u64, flags)
 {
 	if (unlikely(flags))
 		return -EINVAL;
@@ -2542,6 +2542,125 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
+static int bpf_skb_mpls_net_grow(struct sk_buff *skb, int len_diff)
+{
+	u32 off = skb_mac_header_len(skb); /*LL_RESERVED_SPACE ?? */
+	int ret;
+
+	ret = skb_cow(skb, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	skb_set_inner_protocol(skb, skb->protocol);
+	skb_reset_inner_network_header(skb);
+
+	ret = bpf_skb_generic_push(skb, off, len_diff);
+	if (unlikely(ret < 0))
+		return ret;
+
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, ETH_HLEN);
+	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
+
+	if (skb_is_gso(skb)) {
+/* Due to header grow, MSS needs to be downgraded. */
+		skb_shinfo(skb)->gso_size -= len_diff;
+/* Header must be checked, and gso_segs recomputed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+	}
+
+	bpf_compute_data_pointers(skb);
+	return 0;
+}
+
+BPF_CALL_2(bpf_skb_mpls_pop, struct sk_buff*, skb, u32, num_lbls)
+{
+	u32 i = 0;
+	struct mpls_shim_hdr *hdr;
+	unsigned char *cursor;
+	struct mpls_entry_decoded dec;
+
+	if (num_lbls == 0)
+		return 0;
+
+	cursor = skb_network_header(skb);
+	do {
+		hdr = (struct mpls_shim_hdr *)cursor;
+		dec = mpls_entry_decode(hdr);
+		i++; cursor = cursor + sizeof(struct mpls_shim_hdr);
+	} while (dec.bos != 1 && i < num_lbls);
+
+	bpf_push_mac_rcsum(skb);
+	skb_pull(skb, i * sizeof(struct mpls_shim_hdr));
+	skb_reset_network_header(skb);
+
+	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
+	bpf_pull_mac_rcsum(skb);
+	bpf_compute_data_pointers(skb);
+
+	return i;
+}
+
+const struct bpf_func_proto bpf_skb_mpls_pop_proto = {
+				.func   = bpf_skb_mpls_pop,
+				.gpl_only = false,
+				.ret_type = RET_INTEGER,
+				.arg1_type  = ARG_PTR_TO_CTX,
+				.arg2_type  = ARG_ANYTHING,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_mpls_pop_proto);
+
+BPF_CALL_3(bpf_skb_mpls_push, struct sk_buff*, skb,
+	   __be32*, lbls, u32, num_lbls)
+{
+	int ret, i;
+	unsigned int new_header_size = num_lbls * sizeof(__be32);
+	unsigned int ttl = 255;
+	struct dst_entry *dst = skb_dst(skb);
+	struct net_device *out_dev = dst->dev;
+	struct mpls_shim_hdr *hdr;
+	bool bos;
+
+	/* Ensure there is enough space for the headers in the skb */
+	ret = bpf_skb_mpls_net_grow(skb, new_header_size);
+	if (ret < 0) {
+		trace_printk("COW was killed\n");
+		bpf_compute_data_pointers(skb);
+		return -ENOMEM;
+	}
+
+	skb->dev = out_dev;
+/* XXX this may need finesse to integrate with
+ * global TTL values for MPLS
+ */
+	if (dst->ops->family == AF_INET)
+		ttl = ip_hdr(skb)->ttl;
+	else if (dst->ops->family == AF_INET6)
+		ttl = ipv6_hdr(skb)->hop_limit;
+
+	/* Push the new labels */
+	hdr = skb_mpls_hdr(skb);
+	bos = true;
+	for (i = num_lbls - 1; i >= 0; i--) {
+		hdr[i] = mpls_entry_encode(lbls[i], ttl, 0, bos);
+		bos = false;
+	}
+
+	bpf_compute_data_pointers(skb);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_skb_mpls_push_proto = {
+	.func		= bpf_skb_mpls_push,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_mpls_push_proto);
+
 static u32 __bpf_skb_min_len(const struct sk_buff *skb)
 {
 	u32 min_len = skb_network_offset(skb);
@@ -3019,6 +3138,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
 	    func == bpf_skb_vlan_pop ||
+	    func == bpf_skb_mpls_push ||
+	    func == bpf_skb_mpls_pop ||
 	    func == bpf_skb_store_bytes ||
 	    func == bpf_skb_change_proto ||
 	    func == bpf_skb_change_head ||
@@ -3682,6 +3803,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_vlan_push_proto;
 	case BPF_FUNC_skb_vlan_pop:
 		return &bpf_skb_vlan_pop_proto;
+	case BPF_FUNC_skb_mpls_push:
+		return &bpf_skb_mpls_push_proto;
+	case BPF_FUNC_skb_mpls_pop:
+		return &bpf_skb_mpls_pop_proto;
 	case BPF_FUNC_skb_change_proto:
 		return &bpf_skb_change_proto_proto;
 	case BPF_FUNC_skb_change_type:
-- 
2.16.2.windows.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next RFC 1/2] Organize MPLS headers
  2018-03-28 16:27 [PATCH bpf-next RFC 1/2] Organize MPLS headers Shrijeet Mukherjee
  2018-03-28 16:27 ` [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for EBPF Shrijeet Mukherjee
@ 2018-03-28 18:20 ` David Ahern
  1 sibling, 0 replies; 5+ messages in thread
From: David Ahern @ 2018-03-28 18:20 UTC (permalink / raw)
  To: Shrijeet Mukherjee, daniel; +Cc: netdev, roopa, ast

On 3/28/18 10:27 AM, Shrijeet Mukherjee wrote:
> From: Shrijeet Mukherjee <shrijeet@gmail.com>
> 
> Prepare shared headers for new MPLS label push/pop EBPF helpers
> 
> Signed-off-by: Shrijeet Mukherjee <shm@cumulusnetworks.com>
> ---
>  include/net/mpls.h        |  1 +
>  net/core/filter.c         |  2 ++
>  net/mpls/af_mpls.c        |  8 ++++----
>  net/mpls/internal.h       | 31 -------------------------------
>  net/mpls/mpls_iptunnel.c  |  2 +-
>  net/openvswitch/actions.c | 12 ++++++------
>  6 files changed, 14 insertions(+), 42 deletions(-)
> 
> diff --git a/include/net/mpls.h b/include/net/mpls.h
> index 1dbc669b770e..2583dbc689b8 100644
> --- a/include/net/mpls.h
> +++ b/include/net/mpls.h
> @@ -16,6 +16,7 @@
>  
>  #include <linux/if_ether.h>
>  #include <linux/netdevice.h>
> +#include <uapi/linux/mpls.h>

drop the uapi; just include linux/mpls.h

>  
>  #define MPLS_HLEN 4
>  
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c86f03fd9ea5..00f62fafc788 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -29,6 +29,7 @@
>  #include <linux/sock_diag.h>
>  #include <linux/in.h>
>  #include <linux/inet.h>
> +#include <linux/mpls.h>
>  #include <linux/netdevice.h>
>  #include <linux/if_packet.h>
>  #include <linux/if_arp.h>
> @@ -56,6 +57,7 @@
>  #include <net/sock_reuseport.h>
>  #include <net/busy_poll.h>
>  #include <net/tcp.h>
> +#include <net/mpls.h>
>  #include <linux/bpf_trace.h>

Changes to this file are not needed with this patch.

>  
>  /**
> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
> index d4a89a8be013..4e05391b77f0 100644
> --- a/net/mpls/af_mpls.c
> +++ b/net/mpls/af_mpls.c
> @@ -170,7 +170,7 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
>  			break;
>  
>  		/* Read and decode the current label */
> -		hdr = mpls_hdr(skb) + label_index;
> +		hdr = skb_mpls_hdr(skb) + label_index;

No need to add skb_ prefix; no other protocol has the
skb_ prefix (ip_hdr, ipv6_hdr, eth_hdr, ...)


> diff --git a/net/mpls/internal.h b/net/mpls/internal.h
> index 768a302879b4..cd93cb201fed 100644
> --- a/net/mpls/internal.h
> +++ b/net/mpls/internal.h
> @@ -8,13 +8,6 @@
>   */
>  #define MAX_NEW_LABELS 30
>  
> -struct mpls_entry_decoded {
> -	u32 label;
> -	u8 ttl;
> -	u8 tc;
> -	u8 bos;
> -};
> -
>  struct mpls_pcpu_stats {
>  	struct mpls_link_stats	stats;
>  	struct u64_stats_sync	syncp;
> @@ -172,30 +165,6 @@ struct mpls_route { /* next hop label forwarding entry */
>  
>  #define endfor_nexthops(rt) }
>  
> -static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos)
> -{
> -	struct mpls_shim_hdr result;
> -	result.label_stack_entry =
> -		cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) |
> -			    (tc << MPLS_LS_TC_SHIFT) |
> -			    (bos ? (1 << MPLS_LS_S_SHIFT) : 0) |
> -			    (ttl << MPLS_LS_TTL_SHIFT));
> -	return result;
> -}
> -
> -static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
> -{
> -	struct mpls_entry_decoded result;
> -	unsigned entry = be32_to_cpu(hdr->label_stack_entry);
> -
> -	result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
> -	result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
> -	result.tc =  (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
> -	result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
> -
> -	return result;
> -}
> -
>  static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
>  {
>  	return rcu_dereference_rtnl(dev->mpls_ptr);

With just this patch applied, MPLS can't compile since you are removing
these 2 functions.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for EBPF
  2018-03-28 16:27 ` [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for EBPF Shrijeet Mukherjee
@ 2018-03-28 18:23   ` David Ahern
  2018-03-29 17:35     ` Shrijeet mukherjee
  0 siblings, 1 reply; 5+ messages in thread
From: David Ahern @ 2018-03-28 18:23 UTC (permalink / raw)
  To: Shrijeet Mukherjee, daniel; +Cc: netdev, roopa, ast

On 3/28/18 10:27 AM, Shrijeet Mukherjee wrote:

> diff --git a/include/net/mpls.h b/include/net/mpls.h
> index 2583dbc689b8..3a5b8c00823d 100644
> --- a/include/net/mpls.h
> +++ b/include/net/mpls.h
> @@ -24,14 +24,50 @@ struct mpls_shim_hdr {
>  	__be32 label_stack_entry;
>  };
>  
> +struct mpls_entry_decoded {
> +	u32 label;
> +	u8 ttl;
> +	u8 tc;
> +	u8 bos;
> +};
> +
>  static inline bool eth_p_mpls(__be16 eth_type)
>  {
>  	return eth_type == htons(ETH_P_MPLS_UC) ||
>  		eth_type == htons(ETH_P_MPLS_MC);
>  }
>  
> -static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
> +static inline struct mpls_shim_hdr *skb_mpls_hdr(const struct sk_buff *skb)
>  {
>  	return (struct mpls_shim_hdr *)skb_network_header(skb);
>  }
> +
> +static inline struct mpls_shim_hdr
> +mpls_entry_encode(u32 label, unsigned int ttl, unsigned int tc, bool bos)
> +{
> +	struct mpls_shim_hdr result;
> +
> +	result.label_stack_entry =
> +		cpu_to_be32((label << MPLS_LS_LABEL_SHIFT)
> +            | (tc << MPLS_LS_TC_SHIFT)
> +            | (bos ? (1 << MPLS_LS_S_SHIFT) : 0)
> +            | (ttl << MPLS_LS_TTL_SHIFT));
> +
> +	return result;
> +}
> +
> +static inline
> +struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
> +{
> +	struct mpls_entry_decoded result;
> +	unsigned int entry = be32_to_cpu(hdr->label_stack_entry);
> +
> +	result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
> +	result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
> +	result.tc =  (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
> +	result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
> +
> +	return result;
> +}
> +
>  #endif

The above should be in patch 1, though without the mpls_hdr rename.


> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 18b7c510c511..2278548e1f8f 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -439,6 +439,12 @@ union bpf_attr {
>   * int bpf_skb_vlan_pop(skb)
>   *     Return: 0 on success or negative error
>   *
> + * int bpf_skb_mpls_push(skb, num_lbls, lbls[])
> + *     Return 0 on success or negative error
> + *
> + * int bpf_skb_mpls_pop(skb, num_lbls)
> + *     Return number of popped labels, 0 is no-op, deliver packet to current dst
> + *
>   * int bpf_skb_get_tunnel_key(skb, key, size, flags)
>   * int bpf_skb_set_tunnel_key(skb, key, size, flags)
>   *     retrieve or populate tunnel metadata
> @@ -794,7 +800,10 @@ union bpf_attr {
>  	FN(msg_redirect_map),		\
>  	FN(msg_apply_bytes),		\
>  	FN(msg_cork_bytes),		\
> -	FN(msg_pull_data),
> +	FN(msg_pull_data),		\
> +	FN(skb_mpls_push),		\
> +	FN(skb_mpls_pop),
> +
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 00f62fafc788..c96ae8ef423d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2522,7 +2522,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
>  }
>  
>  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
> -	   u32, mode, u64, flags)
> +				u32, mode, u64, flags)

drop the above whitespace change


>  {
>  	if (unlikely(flags))
>  		return -EINVAL;
> @@ -2542,6 +2542,125 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
>  	.arg4_type	= ARG_ANYTHING,
>  };
>  
> +static int bpf_skb_mpls_net_grow(struct sk_buff *skb, int len_diff)
> +{
> +	u32 off = skb_mac_header_len(skb); /*LL_RESERVED_SPACE ?? */
> +	int ret;
> +
> +	ret = skb_cow(skb, len_diff);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	skb_set_inner_protocol(skb, skb->protocol);
> +	skb_reset_inner_network_header(skb);
> +
> +	ret = bpf_skb_generic_push(skb, off, len_diff);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	skb_reset_mac_header(skb);
> +	skb_set_network_header(skb, ETH_HLEN);
> +	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
> +
> +	if (skb_is_gso(skb)) {
> +/* Due to header grow, MSS needs to be downgraded. */
> +		skb_shinfo(skb)->gso_size -= len_diff;
> +/* Header must be checked, and gso_segs recomputed. */
> +		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> +		skb_shinfo(skb)->gso_segs = 0;
> +	}
> +
> +	bpf_compute_data_pointers(skb);
> +	return 0;
> +}

The only thing MPLS specific is the protocol setting. Seems to me you
could leverage the existing bpf_skb_net_grow or even bpf_skb_adjust_net
and then set the protocols and headers in the caller of this function.
You already do something similar in bpf_skb_mpls_pop.


> +
> +BPF_CALL_2(bpf_skb_mpls_pop, struct sk_buff*, skb, u32, num_lbls)
> +{
> +	u32 i = 0;
> +	struct mpls_shim_hdr *hdr;
> +	unsigned char *cursor;
> +	struct mpls_entry_decoded dec;
> +
> +	if (num_lbls == 0)
> +		return 0;
> +
> +	cursor = skb_network_header(skb);
> +	do {
> +		hdr = (struct mpls_shim_hdr *)cursor;
> +		dec = mpls_entry_decode(hdr);
> +		i++; cursor = cursor + sizeof(struct mpls_shim_hdr);
> +	} while (dec.bos != 1 && i < num_lbls);
> +
> +	bpf_push_mac_rcsum(skb);
> +	skb_pull(skb, i * sizeof(struct mpls_shim_hdr));
> +	skb_reset_network_header(skb);
> +
> +	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);

If I pop 1 label and there are more of them, then the protocol does not
need to be adjusted -- it was already set to MPLS.

If I pop all of the labels, then the protocol is not MPLS, but whatever
the inner packet is -- IPv4 or IPv6 or other.


> +	bpf_pull_mac_rcsum(skb);
> +	bpf_compute_data_pointers(skb);
> +
> +	return i;
> +}
> +
> +const struct bpf_func_proto bpf_skb_mpls_pop_proto = {
> +				.func   = bpf_skb_mpls_pop,
> +				.gpl_only = false,
> +				.ret_type = RET_INTEGER,
> +				.arg1_type  = ARG_PTR_TO_CTX,
> +				.arg2_type  = ARG_ANYTHING,
> +};
> +EXPORT_SYMBOL_GPL(bpf_skb_mpls_pop_proto);
> +
> +BPF_CALL_3(bpf_skb_mpls_push, struct sk_buff*, skb,
> +	   __be32*, lbls, u32, num_lbls)
> +{
> +	int ret, i;
> +	unsigned int new_header_size = num_lbls * sizeof(__be32);
> +	unsigned int ttl = 255;
> +	struct dst_entry *dst = skb_dst(skb);

dst is not always set. e.g., any program on ingress prior to the layer 3
will not have it.


> +	struct net_device *out_dev = dst->dev;
> +	struct mpls_shim_hdr *hdr;
> +	bool bos;
> +
> +	/* Ensure there is enough space for the headers in the skb */
> +	ret = bpf_skb_mpls_net_grow(skb, new_header_size);
> +	if (ret < 0) {
> +		trace_printk("COW was killed\n");
> +		bpf_compute_data_pointers(skb);
> +		return -ENOMEM;
> +	}
> +
> +	skb->dev = out_dev;
> +/* XXX this may need finesse to integrate with
> + * global TTL values for MPLS
> + */
> +	if (dst->ops->family == AF_INET)
> +		ttl = ip_hdr(skb)->ttl;
> +	else if (dst->ops->family == AF_INET6)
> +		ttl = ipv6_hdr(skb)->hop_limit;
> +
> +	/* Push the new labels */
> +	hdr = skb_mpls_hdr(skb);
> +	bos = true;
> +	for (i = num_lbls - 1; i >= 0; i--) {
> +		hdr[i] = mpls_entry_encode(lbls[i], ttl, 0, bos);
> +		bos = false;
> +	}
> +
> +	bpf_compute_data_pointers(skb);
> +	return 0;
> +}
> +
> +const struct bpf_func_proto bpf_skb_mpls_push_proto = {
> +	.func		= bpf_skb_mpls_push,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_PTR_TO_MEM,
> +	.arg3_type	= ARG_CONST_SIZE,
> +};
> +EXPORT_SYMBOL_GPL(bpf_skb_mpls_push_proto);
> +
>  static u32 __bpf_skb_min_len(const struct sk_buff *skb)
>  {
>  	u32 min_len = skb_network_offset(skb);
> @@ -3019,6 +3138,8 @@ bool bpf_helper_changes_pkt_data(void *func)
>  {
>  	if (func == bpf_skb_vlan_push ||
>  	    func == bpf_skb_vlan_pop ||
> +	    func == bpf_skb_mpls_push ||
> +	    func == bpf_skb_mpls_pop ||
>  	    func == bpf_skb_store_bytes ||
>  	    func == bpf_skb_change_proto ||
>  	    func == bpf_skb_change_head ||
> @@ -3682,6 +3803,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
>  		return &bpf_skb_vlan_push_proto;
>  	case BPF_FUNC_skb_vlan_pop:
>  		return &bpf_skb_vlan_pop_proto;
> +	case BPF_FUNC_skb_mpls_push:
> +		return &bpf_skb_mpls_push_proto;
> +	case BPF_FUNC_skb_mpls_pop:
> +		return &bpf_skb_mpls_pop_proto;
>  	case BPF_FUNC_skb_change_proto:
>  		return &bpf_skb_change_proto_proto;
>  	case BPF_FUNC_skb_change_type:
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for EBPF
  2018-03-28 18:23   ` David Ahern
@ 2018-03-29 17:35     ` Shrijeet mukherjee
  0 siblings, 0 replies; 5+ messages in thread
From: Shrijeet mukherjee @ 2018-03-29 17:35 UTC (permalink / raw)
  To: David Ahern; +Cc: daniel, netdev, roopa, ast

On 03/28, David Ahern wrote:
> On 3/28/18 10:27 AM, Shrijeet Mukherjee wrote:
> 
> > +static int bpf_skb_mpls_net_grow(struct sk_buff *skb, int len_diff)
> > +{
> > +	u32 off = skb_mac_header_len(skb); /*LL_RESERVED_SPACE ?? */
> > +	int ret;
> > +
> > +	ret = skb_cow(skb, len_diff);
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	skb_set_inner_protocol(skb, skb->protocol);
> > +	skb_reset_inner_network_header(skb);
> > +
> > +	ret = bpf_skb_generic_push(skb, off, len_diff);
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	skb_reset_mac_header(skb);
> > +	skb_set_network_header(skb, ETH_HLEN);
> > +	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
> > +
> > +	if (skb_is_gso(skb)) {
> > +/* Due to header grow, MSS needs to be downgraded. */
> > +		skb_shinfo(skb)->gso_size -= len_diff;
> > +/* Header must be checked, and gso_segs recomputed. */
> > +		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> > +		skb_shinfo(skb)->gso_segs = 0;
> > +	}
> > +
> > +	bpf_compute_data_pointers(skb);
> > +	return 0;
> > +}
> 
> The only thing MPLS specific is the protocol setting. Seems to me you
> could leverage the existing bpf_skb_net_grow or even bpf_skb_adjust_net
> and then set the protocols and headers in the caller of this function.
> You already do something similar in bpf_skb_mpls_pop.
> 

At a high level, this is the discussion I want to have. There is a loss of efficiency if we try to make generic functions and there are differences which are subtle as MPLS does not have an encompassing header. So, do we want the filter functions to become more common (and I can generate a patch to that point) or do we want them to be hyper-efficient is the question I am seeking an answer to.

Will address all the other comments in v2
> 
> > +
> > +BPF_CALL_2(bpf_skb_mpls_pop, struct sk_buff*, skb, u32, num_lbls)
> > +{
> > +	u32 i = 0;
> > +	struct mpls_shim_hdr *hdr;
> > +	unsigned char *cursor;
> > +	struct mpls_entry_decoded dec;
> > +
> > +	if (num_lbls == 0)
> > +		return 0;
> > +
> > +	cursor = skb_network_header(skb);
> > +	do {
> > +		hdr = (struct mpls_shim_hdr *)cursor;
> > +		dec = mpls_entry_decode(hdr);
> > +		i++; cursor = cursor + sizeof(struct mpls_shim_hdr);
> > +	} while (dec.bos != 1 && i < num_lbls);
> > +
> > +	bpf_push_mac_rcsum(skb);
> > +	skb_pull(skb, i * sizeof(struct mpls_shim_hdr));
> > +	skb_reset_network_header(skb);
> > +
> > +	skb->protocol = eth_hdr(skb)->h_proto = htons(ETH_P_MPLS_UC);
> 
> If I pop 1 label and there are more of them, then the protocol does not
> need to be adjusted -- it was already set to MPLS.
> 
> If I pop all of the labels, then the protocol is not MPLS, but whatever
> the inner packet is -- IPv4 or IPv6 or other.

This is the other real question. The filter functions do not allow a generic array to be returned, so the current expectation is that the callee has parsed the full MPLS label stack and provides the correct number. This could mean that MPLS pop is just a generic reduce_hdr_by function and the protocol etc is handled by the bpf program. I don't like that as it will mean incoherent protocol handling in bpf programs. But wanted to hear opinions.

> 
> 
> > +	bpf_pull_mac_rcsum(skb);
> > +	bpf_compute_data_pointers(skb);
> > +
> > +	return i;
> > +}
> > +
> > +const struct bpf_func_proto bpf_skb_mpls_pop_proto = {
> > +				.func   = bpf_skb_mpls_pop,
> > +				.gpl_only = false,
> > +				.ret_type = RET_INTEGER,
> > +				.arg1_type  = ARG_PTR_TO_CTX,
> > +				.arg2_type  = ARG_ANYTHING,
> > +};
> > +EXPORT_SYMBOL_GPL(bpf_skb_mpls_pop_proto);
> > +
> > +BPF_CALL_3(bpf_skb_mpls_push, struct sk_buff*, skb,
> > +	   __be32*, lbls, u32, num_lbls)
> > +{
> > +	int ret, i;
> > +	unsigned int new_header_size = num_lbls * sizeof(__be32);
> > +	unsigned int ttl = 255;
> > +	struct dst_entry *dst = skb_dst(skb);
> 
> dst is not always set. e.g., any program on ingress prior to the layer 3
> will not have it.
> 
> 
> > +	struct net_device *out_dev = dst->dev;
> > +	struct mpls_shim_hdr *hdr;
> > +	bool bos;
> > +
> > +	/* Ensure there is enough space for the headers in the skb */
> > +	ret = bpf_skb_mpls_net_grow(skb, new_header_size);
> > +	if (ret < 0) {
> > +		trace_printk("COW was killed\n");
> > +		bpf_compute_data_pointers(skb);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	skb->dev = out_dev;
> > +/* XXX this may need finesse to integrate with
> > + * global TTL values for MPLS
> > + */
> > +	if (dst->ops->family == AF_INET)
> > +		ttl = ip_hdr(skb)->ttl;
> > +	else if (dst->ops->family == AF_INET6)
> > +		ttl = ipv6_hdr(skb)->hop_limit;
> > +
> > +	/* Push the new labels */
> > +	hdr = skb_mpls_hdr(skb);
> > +	bos = true;
> > +	for (i = num_lbls - 1; i >= 0; i--) {
> > +		hdr[i] = mpls_entry_encode(lbls[i], ttl, 0, bos);
> > +		bos = false;
> > +	}
> > +
> > +	bpf_compute_data_pointers(skb);
> > +	return 0;
> > +}
> > +
> > +const struct bpf_func_proto bpf_skb_mpls_push_proto = {
> > +	.func		= bpf_skb_mpls_push,
> > +	.gpl_only	= false,
> > +	.ret_type	= RET_INTEGER,
> > +	.arg1_type	= ARG_PTR_TO_CTX,
> > +	.arg2_type	= ARG_PTR_TO_MEM,
> > +	.arg3_type	= ARG_CONST_SIZE,
> > +};
> > +EXPORT_SYMBOL_GPL(bpf_skb_mpls_push_proto);
> > +
> >  static u32 __bpf_skb_min_len(const struct sk_buff *skb)
> >  {
> >  	u32 min_len = skb_network_offset(skb);
> > @@ -3019,6 +3138,8 @@ bool bpf_helper_changes_pkt_data(void *func)
> >  {
> >  	if (func == bpf_skb_vlan_push ||
> >  	    func == bpf_skb_vlan_pop ||
> > +	    func == bpf_skb_mpls_push ||
> > +	    func == bpf_skb_mpls_pop ||
> >  	    func == bpf_skb_store_bytes ||
> >  	    func == bpf_skb_change_proto ||
> >  	    func == bpf_skb_change_head ||
> > @@ -3682,6 +3803,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
> >  		return &bpf_skb_vlan_push_proto;
> >  	case BPF_FUNC_skb_vlan_pop:
> >  		return &bpf_skb_vlan_pop_proto;
> > +	case BPF_FUNC_skb_mpls_push:
> > +		return &bpf_skb_mpls_push_proto;
> > +	case BPF_FUNC_skb_mpls_pop:
> > +		return &bpf_skb_mpls_pop_proto;
> >  	case BPF_FUNC_skb_change_proto:
> >  		return &bpf_skb_change_proto_proto;
> >  	case BPF_FUNC_skb_change_type:
> > 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-03-29 17:35 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-28 16:27 [PATCH bpf-next RFC 1/2] Organize MPLS headers Shrijeet Mukherjee
2018-03-28 16:27 ` [PATCH bpf-next RFC 2/2] Add MPLS label push/pop functions for EBPF Shrijeet Mukherjee
2018-03-28 18:23   ` David Ahern
2018-03-29 17:35     ` Shrijeet mukherjee
2018-03-28 18:20 ` [PATCH bpf-next RFC 1/2] Organize MPLS headers David Ahern

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.