All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v2] openvswitch: enable NSH support
@ 2017-08-10 13:21 Yi Yang
       [not found] ` <1502371275-52446-1-git-send-email-yi.y.yang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Yi Yang @ 2017-08-10 13:21 UTC (permalink / raw)
  To: netdev; +Cc: dev, blp, jbenc, Yi Yang

OVS master and 2.8 branch has merged NSH userspace
patch series, this patch is to enable NSH support
in kernel data path in order that OVS can support
NSH in 2.8 release in compat mode by porting this.

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
---
 drivers/net/vxlan.c              |   7 ++
 include/net/nsh.h                | 126 ++++++++++++++++++++++++++++++
 include/uapi/linux/openvswitch.h |  33 ++++++++
 net/openvswitch/actions.c        | 165 +++++++++++++++++++++++++++++++++++++++
 net/openvswitch/flow.c           |  41 ++++++++++
 net/openvswitch/flow.h           |   1 +
 net/openvswitch/flow_netlink.c   |  54 ++++++++++++-
 7 files changed, 426 insertions(+), 1 deletion(-)
 create mode 100644 include/net/nsh.h

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index dbca067..843714c 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -27,6 +27,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/vxlan.h>
+#include <net/nsh.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_tunnel.h>
@@ -1267,6 +1268,9 @@ static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
 	case VXLAN_GPE_NP_IPV6:
 		*protocol = htons(ETH_P_IPV6);
 		break;
+	case VXLAN_GPE_NP_NSH:
+		*protocol = htons(ETH_P_NSH);
+		break;
 	case VXLAN_GPE_NP_ETHERNET:
 		*protocol = htons(ETH_P_TEB);
 		break;
@@ -1806,6 +1810,9 @@ static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
 	case htons(ETH_P_IPV6):
 		gpe->next_protocol = VXLAN_GPE_NP_IPV6;
 		return 0;
+	case htons(ETH_P_NSH):
+		gpe->next_protocol = VXLAN_GPE_NP_NSH;
+		return 0;
 	case htons(ETH_P_TEB):
 		gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
 		return 0;
diff --git a/include/net/nsh.h b/include/net/nsh.h
new file mode 100644
index 0000000..96477a1
--- /dev/null
+++ b/include/net/nsh.h
@@ -0,0 +1,126 @@
+#ifndef __NET_NSH_H
+#define __NET_NSH_H 1
+
+
+/*
+ * Network Service Header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Ver|O|C|R|R|R|R|R|R|    Length   |   MD Type   |  Next Proto   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                Service Path ID                | Service Index |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                                                               |
+ * ~               Mandatory/Optional Context Header               ~
+ * |                                                               |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * Ver = The version field is used to ensure backward compatibility
+ *       going forward with future NSH updates.  It MUST be set to 0x0
+ *       by the sender, in this first revision of NSH.
+ *
+ * O = OAM. when set to 0x1 indicates that this packet is an operations
+ *     and management (OAM) packet.  The receiving SFF and SFs nodes
+ *     MUST examine the payload and take appropriate action.
+ *
+ * C = context. Indicates that a critical metadata TLV is present.
+ *
+ * Length : total length, in 4-byte words, of NSH including the Base
+ *          Header, the Service Path Header and the optional variable
+ *          TLVs.
+ * MD Type: indicates the format of NSH beyond the mandatory Base Header
+ *          and the Service Path Header.
+ *
+ * Next Protocol: indicates the protocol type of the original packet. A
+ *          new IANA registry will be created for protocol type.
+ *
+ * Service Path Identifier (SPI): identifies a service path.
+ *          Participating nodes MUST use this identifier for Service
+ *          Function Path selection.
+ *
+ * Service Index (SI): provides location within the SFP.
+ *
+ * [0] https://tools.ietf.org/html/draft-ietf-sfc-nsh-13
+ */
+
+/**
+ * struct nsh_md1_ctx - Keeps track of NSH context data
+ * @nshc<1-4>: NSH Contexts.
+ */
+struct nsh_md1_ctx {
+	__be32 c[4];
+};
+
+struct nsh_md2_tlv {
+	__be16 md_class;
+	u8 type;
+	u8 length;
+	u8 md_value[];
+};
+
+struct nsh_hdr {
+	__be16 ver_flags_len;
+	u8 md_type;
+	u8 next_proto;
+	__be32 path_hdr;
+	union {
+	    struct nsh_md1_ctx md1;
+	    struct nsh_md2_tlv md2[0];
+	};
+};
+
+/* Masking NSH header fields. */
+#define NSH_VER_MASK       0xc000
+#define NSH_VER_SHIFT      14
+#define NSH_FLAGS_MASK     0x3fc0
+#define NSH_FLAGS_SHIFT    6
+#define NSH_LEN_MASK       0x003f
+#define NSH_LEN_SHIFT      0
+
+#define NSH_SPI_MASK       0xffffff00
+#define NSH_SPI_SHIFT      8
+#define NSH_SI_MASK        0x000000ff
+#define NSH_SI_SHIFT       0
+
+#define NSH_DST_PORT    4790     /* UDP Port for NSH on VXLAN. */
+#define ETH_P_NSH       0x894F   /* Ethertype for NSH. */
+
+/* NSH Base Header Next Protocol. */
+#define NSH_P_IPV4        0x01
+#define NSH_P_IPV6        0x02
+#define NSH_P_ETHERNET    0x03
+#define NSH_P_NSH         0x04
+#define NSH_P_MPLS        0x05
+
+/* MD Type Registry. */
+#define NSH_M_TYPE1     0x01
+#define NSH_M_TYPE2     0x02
+#define NSH_M_EXP1      0xFE
+#define NSH_M_EXP2      0xFF
+
+/* NSH Metadata Length. */
+#define NSH_M_TYPE1_MDLEN 16
+
+/* NSH Base Header Length */
+#define NSH_BASE_HDR_LEN  8
+
+/* NSH MD Type 1 header Length. */
+#define NSH_M_TYPE1_LEN   24
+
+static inline u16
+nsh_hdr_len(const struct nsh_hdr *nsh)
+{
+	return 4 * (ntohs(nsh->ver_flags_len) & NSH_LEN_MASK) >> NSH_LEN_SHIFT;
+}
+
+static inline struct nsh_md1_ctx *
+nsh_md1_ctx(struct nsh_hdr *nsh)
+{
+	return &nsh->md1;
+}
+
+static inline struct nsh_md2_tlv *
+nsh_md2_ctx(struct nsh_hdr *nsh)
+{
+	return nsh->md2;
+}
+
+#endif /* __NET_NSH_H */
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 156ee4c..5a1c94b 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -333,6 +333,7 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_CT_LABELS,	/* 16-octet connection tracking label */
 	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
 	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
+	OVS_KEY_ATTR_NSH,                  /* struct ovs_key_nsh */
 
 #ifdef __KERNEL__
 	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
@@ -491,6 +492,15 @@ struct ovs_key_ct_tuple_ipv6 {
 	__u8   ipv6_proto;
 };
 
+struct ovs_key_nsh {
+	__u8 flags;
+	__u8 mdtype;
+	__u8 np;
+	__u8 pad;
+	__be32 path_hdr;
+	__be32 c[4];
+};
+
 /**
  * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
  * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
@@ -769,6 +779,25 @@ struct ovs_action_push_eth {
 	struct ovs_key_ethernet addresses;
 };
 
+#define OVS_PUSH_NSH_MAX_MD_LEN 248
+/*
+ * struct ovs_action_push_nsh - %OVS_ACTION_ATTR_PUSH_NSH
+ * @flags: NSH header flags.
+ * @mdtype: NSH metadata type.
+ * @mdlen: Length of NSH metadata in bytes.
+ * @np: NSH next_protocol: Inner packet type.
+ * @path_hdr: NSH service path id and service index.
+ * @metadata: NSH metadata for MD type 1 or 2
+ */
+struct ovs_action_push_nsh {
+	__u8 flags;
+	__u8 mdtype;
+	__u8 mdlen;
+	__u8 np;
+	__be32 path_hdr;
+	__u8 metadata[];
+};
+
 /**
  * enum ovs_action_attr - Action types.
  *
@@ -806,6 +835,8 @@ struct ovs_action_push_eth {
  * packet.
  * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
  * packet.
+ * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet.
+ * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -835,6 +866,8 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
 	OVS_ACTION_ATTR_PUSH_ETH,     /* struct ovs_action_push_eth. */
 	OVS_ACTION_ATTR_POP_ETH,      /* No argument. */
+	OVS_ACTION_ATTR_PUSH_NSH,     /* struct ovs_action_push_nsh. */
+	OVS_ACTION_ATTR_POP_NSH,      /* No argument. */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e461067..50f80bc 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -38,6 +38,7 @@
 #include <net/dsfield.h>
 #include <net/mpls.h>
 #include <net/sctp/checksum.h>
+#include <net/nsh.h>
 
 #include "datapath.h"
 #include "flow.h"
@@ -380,6 +381,114 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
+static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
+		    const struct ovs_action_push_nsh *oapn)
+{
+	struct nsh_hdr *nsh;
+	size_t length = NSH_BASE_HDR_LEN + oapn->mdlen;
+	u8 next_proto;
+
+	if (key->mac_proto == MAC_PROTO_ETHERNET) {
+		next_proto = NSH_P_ETHERNET;
+	} else {
+		switch (ntohs(skb->protocol)) {
+		case ETH_P_IP:
+			next_proto = NSH_P_IPV4;
+			break;
+		case ETH_P_IPV6:
+			next_proto = NSH_P_IPV6;
+			break;
+		case ETH_P_NSH:
+			next_proto = NSH_P_NSH;
+			break;
+		default:
+			return -ENOTSUPP;
+		}
+	}
+
+	/* Add the NSH header */
+	if (skb_cow_head(skb, length) < 0)
+		return -ENOMEM;
+
+	skb_push(skb, length);
+	nsh = (struct nsh_hdr *)(skb->data);
+	nsh->ver_flags_len = htons((oapn->flags << NSH_FLAGS_SHIFT) |
+				 (length >> 2));
+	nsh->next_proto = next_proto;
+	nsh->path_hdr = oapn->path_hdr;
+	nsh->md_type = oapn->mdtype;
+	switch (nsh->md_type) {
+	case NSH_M_TYPE1:
+		nsh->md1 = *(struct nsh_md1_ctx *)oapn->metadata;
+		break;
+	case NSH_M_TYPE2: {
+		/* The MD2 metadata in oapn is already padded to 4 bytes. */
+		size_t len = DIV_ROUND_UP(oapn->mdlen, 4) * 4;
+
+		memcpy(nsh->md2, oapn->metadata, len);
+		break;
+	}
+	default:
+		return -ENOTSUPP;
+	}
+
+	if (!skb->inner_protocol)
+		skb_set_inner_protocol(skb, skb->protocol);
+
+	skb->protocol = htons(ETH_P_NSH);
+	key->eth.type = htons(ETH_P_NSH);
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	/* safe right before invalidate_flow_key */
+	key->mac_proto = MAC_PROTO_NONE;
+	invalidate_flow_key(key);
+	return 0;
+}
+
+static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	struct nsh_hdr *nsh = (struct nsh_hdr *)(skb->data);
+	size_t length;
+	u16 inner_proto;
+
+	if (ovs_key_mac_proto(key) != MAC_PROTO_NONE ||
+	    skb->protocol != htons(ETH_P_NSH)) {
+		return -EINVAL;
+	}
+
+	switch (nsh->next_proto) {
+	case NSH_P_ETHERNET:
+		inner_proto = htons(ETH_P_TEB);
+		break;
+	case NSH_P_IPV4:
+		inner_proto = htons(ETH_P_IP);
+		break;
+	case NSH_P_IPV6:
+		inner_proto = htons(ETH_P_IPV6);
+		break;
+	case NSH_P_NSH:
+		inner_proto = htons(ETH_P_NSH);
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	length = nsh_hdr_len(nsh);
+	skb_pull(skb, length);
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+	skb->protocol = inner_proto;
+
+	/* safe right before invalidate_flow_key */
+	if (inner_proto == htons(ETH_P_TEB))
+		key->mac_proto = MAC_PROTO_ETHERNET;
+	else
+		key->mac_proto = MAC_PROTO_NONE;
+	invalidate_flow_key(key);
+	return 0;
+}
+
 static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
 				  __be32 addr, __be32 new_addr)
 {
@@ -602,6 +711,49 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
 	return 0;
 }
 
+static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_nsh *key,
+		   const struct ovs_key_nsh *mask)
+{
+	struct nsh_hdr *nsh;
+	int err;
+	u8 flags;
+	int i;
+
+	err = skb_ensure_writable(skb, skb_network_offset(skb) +
+				  sizeof(struct nsh_hdr));
+	if (unlikely(err))
+		return err;
+
+	nsh = (struct nsh_hdr *)skb_network_header(skb);
+
+	flags = (ntohs(nsh->ver_flags_len) & NSH_FLAGS_MASK) >>
+		NSH_FLAGS_SHIFT;
+	flags = OVS_MASKED(flags, key->flags, mask->flags);
+	flow_key->nsh.flags = flags;
+	nsh->ver_flags_len = htons(flags << NSH_FLAGS_SHIFT) |
+			     (nsh->ver_flags_len & ~htons(NSH_FLAGS_MASK));
+	nsh->path_hdr = OVS_MASKED(nsh->path_hdr, key->path_hdr,
+				   mask->path_hdr);
+	flow_key->nsh.path_hdr = nsh->path_hdr;
+	switch (nsh->md_type) {
+	case NSH_M_TYPE1:
+		for (i = 0; i < 4; i++) {
+			nsh->md1.c[i] =
+			    OVS_MASKED(nsh->md1.c[i], key->c[i], mask->c[i]);
+			flow_key->nsh.c[i] = nsh->md1.c[i];
+		}
+		break;
+	case NSH_M_TYPE2:
+		for (i = 0; i < 4; i++)
+			flow_key->nsh.c[i] = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /* Must follow skb_ensure_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
 			__be16 new_port, __sum16 *check)
@@ -1024,6 +1176,11 @@ static int execute_masked_set_action(struct sk_buff *skb,
 				   get_mask(a, struct ovs_key_ethernet *));
 		break;
 
+	case OVS_KEY_ATTR_NSH:
+		err = set_nsh(skb, flow_key, nla_data(a),
+			      get_mask(a, struct ovs_key_nsh *));
+		break;
+
 	case OVS_KEY_ATTR_IPV4:
 		err = set_ipv4(skb, flow_key, nla_data(a),
 			       get_mask(a, struct ovs_key_ipv4 *));
@@ -1210,6 +1367,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 		case OVS_ACTION_ATTR_POP_ETH:
 			err = pop_eth(skb, key);
 			break;
+
+		case OVS_ACTION_ATTR_PUSH_NSH:
+			err = push_nsh(skb, key, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_POP_NSH:
+			err = pop_nsh(skb, key);
+			break;
 		}
 
 		if (unlikely(err)) {
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8c94cef..dc8631c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -46,6 +46,7 @@
 #include <net/ipv6.h>
 #include <net/mpls.h>
 #include <net/ndisc.h>
+#include <net/nsh.h>
 
 #include "conntrack.h"
 #include "datapath.h"
@@ -490,6 +491,42 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
+static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	struct nsh_hdr *nsh = (struct nsh_hdr *)skb_network_header(skb);
+	u16 ver_flags_len;
+	u8 version, length;
+	u32 path_hdr;
+	int i;
+
+	memset(&key->nsh, 0, sizeof(struct ovs_key_nsh));
+	ver_flags_len = ntohs(nsh->ver_flags_len);
+	version = (ver_flags_len & NSH_VER_MASK) >> NSH_VER_SHIFT;
+	length = (ver_flags_len & NSH_LEN_MASK) >> NSH_LEN_SHIFT;
+
+	key->nsh.flags = (ver_flags_len & NSH_FLAGS_MASK) >> NSH_FLAGS_SHIFT;
+	key->nsh.mdtype = nsh->md_type;
+	key->nsh.np = nsh->next_proto;
+	path_hdr = ntohl(nsh->path_hdr);
+	key->nsh.path_hdr = nsh->path_hdr;
+	switch (key->nsh.mdtype) {
+	case NSH_M_TYPE1:
+		if ((length << 2) != NSH_M_TYPE1_LEN)
+			return -EINVAL;
+
+		for (i = 0; i < 4; i++)
+			key->nsh.c[i] = nsh->md1.c[i];
+
+		break;
+	case NSH_M_TYPE2:
+		/* Don't support MD type 2 yet */
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * key_extract - extracts a flow key from an Ethernet frame.
  * @skb: sk_buff that contains the frame, with skb->data pointing to the
@@ -735,6 +772,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		}
+	} else if (key->eth.type == htons(ETH_P_NSH)) {
+		error = parse_nsh(skb, key);
+		if (error)
+			return error;
 	}
 	return 0;
 }
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 1875bba..d2a0e56 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -144,6 +144,7 @@ struct sw_flow_key {
 			};
 		} ipv6;
 	};
+	struct ovs_key_nsh nsh;         /* network service header */
 	struct {
 		/* Connection tracking fields not packed above. */
 		struct {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f07d10a..de96839 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@
 #include <net/ndisc.h>
 #include <net/mpls.h>
 #include <net/vxlan.h>
+#include <net/nsh.h>
 
 #include "flow_netlink.h"
 
@@ -76,9 +77,11 @@ static bool actions_may_change_flow(const struct nlattr *actions)
 
 		case OVS_ACTION_ATTR_CT:
 		case OVS_ACTION_ATTR_HASH:
+		case OVS_ACTION_ATTR_POP_NSH:
 		case OVS_ACTION_ATTR_POP_ETH:
 		case OVS_ACTION_ATTR_POP_MPLS:
 		case OVS_ACTION_ATTR_POP_VLAN:
+		case OVS_ACTION_ATTR_PUSH_NSH:
 		case OVS_ACTION_ATTR_PUSH_ETH:
 		case OVS_ACTION_ATTR_PUSH_MPLS:
 		case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -327,7 +330,7 @@ size_t ovs_key_attr_size(void)
 	/* Whenever adding new OVS_KEY_ FIELDS, we should consider
 	 * updating this function.
 	 */
-	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28);
+	BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
 
 	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
 		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
@@ -341,6 +344,7 @@ size_t ovs_key_attr_size(void)
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
 		+ nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
 		+ nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
+		+ nla_total_size(24)  /* OVS_KEY_ATTR_NSH */
 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
 		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
@@ -405,6 +409,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 		.len = sizeof(struct ovs_key_ct_tuple_ipv4) },
 	[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
 		.len = sizeof(struct ovs_key_ct_tuple_ipv6) },
+	[OVS_KEY_ATTR_NSH]       = { .len = sizeof(struct ovs_key_nsh) },
 };
 
 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
@@ -1306,6 +1311,22 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
 	}
 
+	if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
+		int i;
+		const struct ovs_key_nsh *nsh_key;
+
+		nsh_key = nla_data(a[OVS_KEY_ATTR_NSH]);
+		SW_FLOW_KEY_PUT(match, nsh.flags, nsh_key->flags, is_mask);
+		SW_FLOW_KEY_PUT(match, nsh.mdtype, nsh_key->mdtype, is_mask);
+		SW_FLOW_KEY_PUT(match, nsh.np, nsh_key->np, is_mask);
+		SW_FLOW_KEY_PUT(match, nsh.path_hdr, nsh_key->path_hdr,
+				is_mask);
+		for (i = 0; i < 4; i++)
+			SW_FLOW_KEY_PUT(match, nsh.c[i], nsh_key->c[i],
+					is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_NSH);
+	}
+
 	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
 		const struct ovs_key_mpls *mpls_key;
 
@@ -1750,6 +1771,21 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 		ipv6_key->ipv6_tclass = output->ip.tos;
 		ipv6_key->ipv6_hlimit = output->ip.ttl;
 		ipv6_key->ipv6_frag = output->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_NSH)) {
+		int i;
+		struct ovs_key_nsh *nsh_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_NSH, sizeof(*nsh_key));
+		if (!nla)
+			goto nla_put_failure;
+		nsh_key = nla_data(nla);
+		memset(nsh_key, 0, sizeof(struct ovs_key_nsh));
+		nsh_key->flags = output->nsh.flags;
+		nsh_key->mdtype = output->nsh.mdtype;
+		nsh_key->np = output->nsh.np;
+		nsh_key->path_hdr = output->nsh.path_hdr;
+		for (i = 0; i < 4; i++)
+			nsh_key->c[0] = output->nsh.c[i];
 	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
 		   swkey->eth.type == htons(ETH_P_RARP)) {
 		struct ovs_key_arp *arp_key;
@@ -2384,6 +2420,9 @@ static int validate_set(const struct nlattr *a,
 
 		break;
 
+	case OVS_KEY_ATTR_NSH:
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -2482,6 +2521,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
 			[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
 			[OVS_ACTION_ATTR_POP_ETH] = 0,
+			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
+			[OVS_ACTION_ATTR_POP_NSH] = 0,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -2636,6 +2677,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			mac_proto = MAC_PROTO_ETHERNET;
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_NSH:
+			mac_proto = MAC_PROTO_NONE;
+			break;
+
+		case OVS_ACTION_ATTR_POP_NSH:
+			if (key->nsh.np == NSH_P_ETHERNET)
+				mac_proto = MAC_PROTO_ETHERNET;
+			else
+				mac_proto = MAC_PROTO_NONE;
+			break;
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
       [not found] ` <1502371275-52446-1-git-send-email-yi.y.yang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2017-08-11  8:24   ` Jiri Benc
  2017-08-11  8:47     ` Yang, Yi
  2017-08-14 16:09   ` Eric Garver
  1 sibling, 1 reply; 17+ messages in thread
From: Jiri Benc @ 2017-08-11  8:24 UTC (permalink / raw)
  To: Yi Yang; +Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA

On Thu, 10 Aug 2017 21:21:15 +0800, Yi Yang wrote:
> OVS master and 2.8 branch has merged NSH userspace
> patch series, this patch is to enable NSH support
> in kernel data path in order that OVS can support
> NSH in 2.8 release in compat mode by porting this.

Please include changelog when posting a new version of a patch.

> +static inline u16
> +nsh_hdr_len(const struct nsh_hdr *nsh)

Single line, please. And all other instances of this in nsh.h, too.

> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -333,6 +333,7 @@ enum ovs_key_attr {
>  	OVS_KEY_ATTR_CT_LABELS,	/* 16-octet connection tracking label */
>  	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
>  	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
> +	OVS_KEY_ATTR_NSH,                  /* struct ovs_key_nsh */
>  
>  #ifdef __KERNEL__
>  	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
> @@ -491,6 +492,15 @@ struct ovs_key_ct_tuple_ipv6 {
>  	__u8   ipv6_proto;
>  };
>  
> +struct ovs_key_nsh {
> +	__u8 flags;
> +	__u8 mdtype;
> +	__u8 np;
> +	__u8 pad;
> +	__be32 path_hdr;
> +	__be32 c[4];

I still don't like the "c" name. Please change it to something
descriptive. This is uAPI and can't be changed later.

And I still don't see my comment about this not being extensible for
MD type 2 addressed. Please understand this is uAPI and it is set in
stone once it is merged into the kernel. It's very important we get
this right since the beginning.

> +struct ovs_action_push_nsh {
> +	__u8 flags;
> +	__u8 mdtype;
> +	__u8 mdlen;
> +	__u8 np;
> +	__be32 path_hdr;
> +	__u8 metadata[];
> +};

This is not how netlink attributes work. Please reread Ben Pfaff's
explanation on how this needs to be structured (Message-ID:
<20170809180912.GU6175-LZ6Gd1LRuIk@public.gmane.org>) and rework the patch. I 100% agree
with what he wrote, his proposal is very clean and matches how netlink
is designed.

> @@ -835,6 +866,8 @@ enum ovs_action_attr {
>  	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
>  	OVS_ACTION_ATTR_PUSH_ETH,     /* struct ovs_action_push_eth. */
>  	OVS_ACTION_ATTR_POP_ETH,      /* No argument. */
> +	OVS_ACTION_ATTR_PUSH_NSH,     /* struct ovs_action_push_nsh. */
> +	OVS_ACTION_ATTR_POP_NSH,      /* No argument. */

Thank you for changing this to push/pop, it looks much cleaner now.

> +static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
> +{
> +	struct nsh_hdr *nsh = (struct nsh_hdr *)skb_network_header(skb);
> +	u16 ver_flags_len;
> +	u8 version, length;
> +	u32 path_hdr;
> +	int i;
> +
> +	memset(&key->nsh, 0, sizeof(struct ovs_key_nsh));
> +	ver_flags_len = ntohs(nsh->ver_flags_len);
> +	version = (ver_flags_len & NSH_VER_MASK) >> NSH_VER_SHIFT;
> +	length = (ver_flags_len & NSH_LEN_MASK) >> NSH_LEN_SHIFT;

A nit: the operation to get/set version, length and flags from the NSH
header seems to be repeated enough to warrant helper functions in
include/net/nsh.h. Something like:

static inline u8 nsh_get_version(const struct nsh_hdr *nsh)
{
	return (ntohs(nsh->ver_flags_len) & NSH_VER_MASK) >> NSH_VER_SHIFT;
}

etc.

Not a blocker, though, it may be done later if needed.

> @@ -76,9 +77,11 @@ static bool actions_may_change_flow(const struct nlattr *actions)
>  
>  		case OVS_ACTION_ATTR_CT:
>  		case OVS_ACTION_ATTR_HASH:
> +		case OVS_ACTION_ATTR_POP_NSH:
>  		case OVS_ACTION_ATTR_POP_ETH:
>  		case OVS_ACTION_ATTR_POP_MPLS:
>  		case OVS_ACTION_ATTR_POP_VLAN:
> +		case OVS_ACTION_ATTR_PUSH_NSH:
>  		case OVS_ACTION_ATTR_PUSH_ETH:
>  		case OVS_ACTION_ATTR_PUSH_MPLS:
>  		case OVS_ACTION_ATTR_PUSH_VLAN:

Alphabetical order, please.

Thanks,

 Jiri

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-11  8:24   ` Jiri Benc
@ 2017-08-11  8:47     ` Yang, Yi
  2017-08-11  9:10       ` Jiri Benc
  0 siblings, 1 reply; 17+ messages in thread
From: Yang, Yi @ 2017-08-11  8:47 UTC (permalink / raw)
  To: Jiri Benc; +Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA

On Fri, Aug 11, 2017 at 10:24:18AM +0200, Jiri Benc wrote:
> On Thu, 10 Aug 2017 21:21:15 +0800, Yi Yang wrote:
> > OVS master and 2.8 branch has merged NSH userspace
> > patch series, this patch is to enable NSH support
> > in kernel data path in order that OVS can support
> > NSH in 2.8 release in compat mode by porting this.
> 
> Please include changelog when posting a new version of a patch.
> 
> > +static inline u16
> > +nsh_hdr_len(const struct nsh_hdr *nsh)
> 
> Single line, please. And all other instances of this in nsh.h, too.

Thanks Jiri, I'll change these in next version.

> 
> > --- a/include/uapi/linux/openvswitch.h
> > +++ b/include/uapi/linux/openvswitch.h
> > @@ -333,6 +333,7 @@ enum ovs_key_attr {
> >  	OVS_KEY_ATTR_CT_LABELS,	/* 16-octet connection tracking label */
> >  	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
> >  	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
> > +	OVS_KEY_ATTR_NSH,                  /* struct ovs_key_nsh */
> >  
> >  #ifdef __KERNEL__
> >  	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
> > @@ -491,6 +492,15 @@ struct ovs_key_ct_tuple_ipv6 {
> >  	__u8   ipv6_proto;
> >  };
> >  
> > +struct ovs_key_nsh {
> > +	__u8 flags;
> > +	__u8 mdtype;
> > +	__u8 np;
> > +	__u8 pad;
> > +	__be32 path_hdr;
> > +	__be32 c[4];
> 
> I still don't like the "c" name. Please change it to something
> descriptive. This is uAPI and can't be changed later.

is "__be32 context[4]" ok?

> 
> And I still don't see my comment about this not being extensible for
> MD type 2 addressed. Please understand this is uAPI and it is set in
> stone once it is merged into the kernel. It's very important we get
> this right since the beginning.

I understood it, but I don't know how I can do this per your comments,

Per Ben's comments,

PUSH_NSH message looks like

PUSH_NSH begin
    nsh base header
    MD type 1
PUSH_NSH end

or 

PUSH_NSH begin
    nsh base header
    MD type 2
PUSH_NSH end

If so, I think we don't need struct ovs_action_push_nsh at all, just as
_CT action did, treat it as a variable data buffer.

So define three new netlink attributes

OVS_ACTION_ATTR_NSH_BASE_HEADER
OVS_ACTION_ATTR_NSH_MD1_DATA
OVS_ACTION_ATTR_NSH_MD2_DATA

OVS_ACTION_ATTR_PUSH_NSH is nested netlink attribute, it will nest
OVS_ACTION_ATTR_NSH_BASE_HEADER and OVS_ACTION_ATTR_NSH_MD1_DATA for MD
type 1, it will nest OVS_ACTION_ATTR_NSH_BASE_HEADER and
OVS_ACTION_ATTR_NSH_MD2_DATA for MD type 2. I'll compeletely remove struct
ovs_action_push_nsh, is it ok?

> 
> > +struct ovs_action_push_nsh {
> > +	__u8 flags;
> > +	__u8 mdtype;
> > +	__u8 mdlen;
> > +	__u8 np;
> > +	__be32 path_hdr;
> > +	__u8 metadata[];
> > +};
> 
> This is not how netlink attributes work. Please reread Ben Pfaff's
> explanation on how this needs to be structured (Message-ID:
> <20170809180912.GU6175-LZ6Gd1LRuIk@public.gmane.org>) and rework the patch. I 100% agree
> with what he wrote, his proposal is very clean and matches how netlink
> is designed.
> 
> > @@ -835,6 +866,8 @@ enum ovs_action_attr {
> >  	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
> >  	OVS_ACTION_ATTR_PUSH_ETH,     /* struct ovs_action_push_eth. */
> >  	OVS_ACTION_ATTR_POP_ETH,      /* No argument. */
> > +	OVS_ACTION_ATTR_PUSH_NSH,     /* struct ovs_action_push_nsh. */
> > +	OVS_ACTION_ATTR_POP_NSH,      /* No argument. */
> 
> Thank you for changing this to push/pop, it looks much cleaner now.
> 
> > +static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
> > +{
> > +	struct nsh_hdr *nsh = (struct nsh_hdr *)skb_network_header(skb);
> > +	u16 ver_flags_len;
> > +	u8 version, length;
> > +	u32 path_hdr;
> > +	int i;
> > +
> > +	memset(&key->nsh, 0, sizeof(struct ovs_key_nsh));
> > +	ver_flags_len = ntohs(nsh->ver_flags_len);
> > +	version = (ver_flags_len & NSH_VER_MASK) >> NSH_VER_SHIFT;
> > +	length = (ver_flags_len & NSH_LEN_MASK) >> NSH_LEN_SHIFT;
> 
> A nit: the operation to get/set version, length and flags from the NSH
> header seems to be repeated enough to warrant helper functions in
> include/net/nsh.h. Something like:
> 
> static inline u8 nsh_get_version(const struct nsh_hdr *nsh)
> {
> 	return (ntohs(nsh->ver_flags_len) & NSH_VER_MASK) >> NSH_VER_SHIFT;
> }
> 
> etc.
> 
> Not a blocker, though, it may be done later if needed.

Ok, will change it.

> 
> > @@ -76,9 +77,11 @@ static bool actions_may_change_flow(const struct nlattr *actions)
> >  
> >  		case OVS_ACTION_ATTR_CT:
> >  		case OVS_ACTION_ATTR_HASH:
> > +		case OVS_ACTION_ATTR_POP_NSH:
> >  		case OVS_ACTION_ATTR_POP_ETH:
> >  		case OVS_ACTION_ATTR_POP_MPLS:
> >  		case OVS_ACTION_ATTR_POP_VLAN:
> > +		case OVS_ACTION_ATTR_PUSH_NSH:
> >  		case OVS_ACTION_ATTR_PUSH_ETH:
> >  		case OVS_ACTION_ATTR_PUSH_MPLS:
> >  		case OVS_ACTION_ATTR_PUSH_VLAN:
> 
> Alphabetical order, please.

Ok, will change.
> 
> Thanks,
> 
>  Jiri

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-11  8:47     ` Yang, Yi
@ 2017-08-11  9:10       ` Jiri Benc
  2017-08-11  9:24         ` Yang, Yi Y
  0 siblings, 1 reply; 17+ messages in thread
From: Jiri Benc @ 2017-08-11  9:10 UTC (permalink / raw)
  To: Yang, Yi; +Cc: netdev, dev

On Fri, 11 Aug 2017 16:47:23 +0800, Yang, Yi wrote:
> is "__be32 context[4]" ok?

Yes, that looks better.

> So define three new netlink attributes
> 
> OVS_ACTION_ATTR_NSH_BASE_HEADER
> OVS_ACTION_ATTR_NSH_MD1_DATA
> OVS_ACTION_ATTR_NSH_MD2_DATA
> 
> OVS_ACTION_ATTR_PUSH_NSH is nested netlink attribute, it will nest
> OVS_ACTION_ATTR_NSH_BASE_HEADER and OVS_ACTION_ATTR_NSH_MD1_DATA for MD
> type 1, it will nest OVS_ACTION_ATTR_NSH_BASE_HEADER and
> OVS_ACTION_ATTR_NSH_MD2_DATA for MD type 2. I'll compeletely remove struct
> ovs_action_push_nsh, is it ok?

Yes, that's the way to do it.

What should be done with struct ovs_key_nsh? Even with "c" renamed to
"context", it's still MD type 1 only structure. What is the plan for
MD type 2 support wrt. this structure?

Thanks,

 Jiri

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-11  9:10       ` Jiri Benc
@ 2017-08-11  9:24         ` Yang, Yi Y
  2017-08-11  9:44           ` Jiri Benc
  0 siblings, 1 reply; 17+ messages in thread
From: Yang, Yi Y @ 2017-08-11  9:24 UTC (permalink / raw)
  To: Jiri Benc; +Cc: netdev, dev, Jan Scheurich

So far, we're not clear how we can support MD type 2 better, as I explained before, we need to reuse tun_metadata in struct flow_tnl which is the thing Geneve is using. Geneve predefined 64 keys for this from tun_metadata0 to tun_metadata63, we will reuse it for MD type 2. But you know NSH is not tunnel, so it has to be changed to support both Geneve and NSH. Anyway, they won't be part of ovs_key_nsh.

-----Original Message-----
From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Jiri Benc
Sent: Friday, August 11, 2017 5:11 PM
To: Yang, Yi Y <yi.y.yang@intel.com>
Cc: netdev@vger.kernel.org; dev@openvswitch.org
Subject: Re: [PATCH net-next v2] openvswitch: enable NSH support

On Fri, 11 Aug 2017 16:47:23 +0800, Yang, Yi wrote:
> is "__be32 context[4]" ok?

Yes, that looks better.

> So define three new netlink attributes
> 
> OVS_ACTION_ATTR_NSH_BASE_HEADER
> OVS_ACTION_ATTR_NSH_MD1_DATA
> OVS_ACTION_ATTR_NSH_MD2_DATA
> 
> OVS_ACTION_ATTR_PUSH_NSH is nested netlink attribute, it will nest 
> OVS_ACTION_ATTR_NSH_BASE_HEADER and OVS_ACTION_ATTR_NSH_MD1_DATA for 
> MD type 1, it will nest OVS_ACTION_ATTR_NSH_BASE_HEADER and 
> OVS_ACTION_ATTR_NSH_MD2_DATA for MD type 2. I'll compeletely remove 
> struct ovs_action_push_nsh, is it ok?

Yes, that's the way to do it.

What should be done with struct ovs_key_nsh? Even with "c" renamed to "context", it's still MD type 1 only structure. What is the plan for MD type 2 support wrt. this structure?

Thanks,

 Jiri

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-11  9:24         ` Yang, Yi Y
@ 2017-08-11  9:44           ` Jiri Benc
  2017-08-11  9:54             ` Yang, Yi
  2017-08-11 10:09             ` Jan Scheurich
  0 siblings, 2 replies; 17+ messages in thread
From: Jiri Benc @ 2017-08-11  9:44 UTC (permalink / raw)
  To: Yang, Yi Y; +Cc: netdev, dev, Jan Scheurich

On Fri, 11 Aug 2017 09:24:25 +0000, Yang, Yi Y wrote:
> So far, we're not clear how we can support MD type 2 better, as I
> explained before, we need to reuse tun_metadata in struct flow_tnl
> which is the thing Geneve is using. Geneve predefined 64 keys for
> this from tun_metadata0 to tun_metadata63, we will reuse it for MD
> type 2. But you know NSH is not tunnel, so it has to be changed to
> support both Geneve and NSH. Anyway, they won't be part of
> ovs_key_nsh.

Please do not top post.

The context field does not apply to MD type 2. It looks wrong for the
context field to be included in netlink attribute for anything other
than MD type 1. Perhaps it needs to be put into a separate attribute,
too?

Note that I'm talking only about the uAPI. Internally, ovs can use
struct ovs_key_nsh that is MD type 1 only, there's no problem changing
that later. But for the user space interface, this needs to be clean.
This can be solved for example this way:

In include/uapi/linux/openvswitch.h:

struct ovs_key_nsh_base {
	__u8 flags;
	__u8 mdtype;
	__u8 np;
	__u8 pad;
	__be32 path_hdr;
};

+ one more netlink attribute carrying MD type 1 info. Will probably
require to change OVS_KEY_ATTR_NSH to a nested attribute etc.

In net/openvswitch/flow.h (or perhaps a different header would be more
appropriate?):

struct ovs_key_nsh {
	struct ovs_key_nsh_base base;
	__be32 context[4];
};

Plus needed conversions between OVS_KEY_ATTR_NSH and struct ovs_key_nsh
when interfacing between the kernel and user space.

That way, we can have MD type 1 support only for now while still being
allowed to redesign things in whatever way later.

 Jiri

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-11  9:44           ` Jiri Benc
@ 2017-08-11  9:54             ` Yang, Yi
  2017-08-11 10:09             ` Jan Scheurich
  1 sibling, 0 replies; 17+ messages in thread
From: Yang, Yi @ 2017-08-11  9:54 UTC (permalink / raw)
  To: Jiri Benc; +Cc: netdev, dev

On Fri, Aug 11, 2017 at 11:44:49AM +0200, Jiri Benc wrote:
> On Fri, 11 Aug 2017 09:24:25 +0000, Yang, Yi Y wrote:
> > So far, we're not clear how we can support MD type 2 better, as I
> > explained before, we need to reuse tun_metadata in struct flow_tnl
> > which is the thing Geneve is using. Geneve predefined 64 keys for
> > this from tun_metadata0 to tun_metadata63, we will reuse it for MD
> > type 2. But you know NSH is not tunnel, so it has to be changed to
> > support both Geneve and NSH. Anyway, they won't be part of
> > ovs_key_nsh.
> 
> Please do not top post.

Sorry for this inconvenience, I'm using outlook. But now I have to use
mutt to address your concern :-)

> 
> The context field does not apply to MD type 2. It looks wrong for the
> context field to be included in netlink attribute for anything other
> than MD type 1. Perhaps it needs to be put into a separate attribute,
> too?
> 
> Note that I'm talking only about the uAPI. Internally, ovs can use
> struct ovs_key_nsh that is MD type 1 only, there's no problem changing
> that later. But for the user space interface, this needs to be clean.
> This can be solved for example this way:
> 
> In include/uapi/linux/openvswitch.h:
> 
> struct ovs_key_nsh_base {
> 	__u8 flags;
> 	__u8 mdtype;
> 	__u8 np;
> 	__u8 pad;
> 	__be32 path_hdr;
> };
> 
> + one more netlink attribute carrying MD type 1 info. Will probably
> require to change OVS_KEY_ATTR_NSH to a nested attribute etc.
> 
> In net/openvswitch/flow.h (or perhaps a different header would be more
> appropriate?):
> 
> struct ovs_key_nsh {
> 	struct ovs_key_nsh_base base;
> 	__be32 context[4];
> };
> 
> Plus needed conversions between OVS_KEY_ATTR_NSH and struct ovs_key_nsh
> when interfacing between the kernel and user space.
> 
> That way, we can have MD type 1 support only for now while still being
> allowed to redesign things in whatever way later.

Yeah, good suggestion, will try to do that way.

> 
>  Jiri

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-11  9:44           ` Jiri Benc
  2017-08-11  9:54             ` Yang, Yi
@ 2017-08-11 10:09             ` Jan Scheurich
       [not found]               ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D7273679A-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
  2017-08-11 10:35               ` Jiri Pirko
  1 sibling, 2 replies; 17+ messages in thread
From: Jan Scheurich @ 2017-08-11 10:09 UTC (permalink / raw)
  To: Jiri Benc, Yang, Yi Y
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA

> -----Original Message-----
> From: Jiri Benc [mailto:jbenc-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org]
> Sent: Friday, 11 August, 2017 11:45
> 
> The context field does not apply to MD type 2. It looks wrong for the
> context field to be included in netlink attribute for anything other
> than MD type 1. Perhaps it needs to be put into a separate attribute,
> too?
> 
> Note that I'm talking only about the uAPI. Internally, ovs can use
> struct ovs_key_nsh that is MD type 1 only, there's no problem changing
> that later. But for the user space interface, this needs to be clean.
> This can be solved for example this way:
> 
> In include/uapi/linux/openvswitch.h:
> 
> struct ovs_key_nsh_base {
> 	__u8 flags;
> 	__u8 mdtype;
> 	__u8 np;
> 	__u8 pad;
> 	__be32 path_hdr;
> };
> 
> + one more netlink attribute carrying MD type 1 info. Will probably
> require to change OVS_KEY_ATTR_NSH to a nested attribute etc.
> 
> In net/openvswitch/flow.h (or perhaps a different header would be more
> appropriate?):
> 
> struct ovs_key_nsh {
> 	struct ovs_key_nsh_base base;
> 	__be32 context[4];
> };
> 
> Plus needed conversions between OVS_KEY_ATTR_NSH and struct ovs_key_nsh
> when interfacing between the kernel and user space.
> 
> That way, we can have MD type 1 support only for now while still being
> allowed to redesign things in whatever way later.
> 
>  Jiri

For the OVS_KEY_ATTR_NSH I agree to move the fixed size MD1 context headers from nsh_base to a separate struct and use nested netlink attributes.

For OVS_ACTION_ATTR_PUSH_NSH attribute any metadata included is opaque to the datapath and should be copied as is into the packet header. I doubt that there is any benefit to model this with nested attributes for MD1 or MD2. This just adds complexity on sender and receiver side and requires updates in case there should be other MD types added later on.

Unless someone can explain to me why the datapath should understand the internal structure/format of metadata in push_nsh, I would strongly vote to keep the metadata as variable length octet sequence in the non-structured OVS_ACTION_ATTR_PUSH_NSH

BR, Jan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
       [not found]               ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D7273679A-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
@ 2017-08-11 10:22                 ` Jiri Benc
  2017-08-13 21:13                   ` Jan Scheurich
  0 siblings, 1 reply; 17+ messages in thread
From: Jiri Benc @ 2017-08-11 10:22 UTC (permalink / raw)
  To: Jan Scheurich; +Cc: netdev-u79uwXL29TY76Z2rM5mHXA, dev-yBygre7rU0TnMu66kgdUjQ

On Fri, 11 Aug 2017 10:09:36 +0000, Jan Scheurich wrote:
> Unless someone can explain to me why the datapath should understand the
> internal structure/format of metadata in push_nsh, I would strongly
> vote to keep the metadata as variable length octet sequence in the
> non-structured OVS_ACTION_ATTR_PUSH_NSH

Could be but it still needs to be in a different attribute and not in
the ovs_action_push_nsh structure.

Separate attributes for MD1/MD2 has the advantage of easier validation:
with a separate MD1 type attribute, the size check is easier. With an
unstructured MD attribute, we'd need to look into the
OVS_ACTION_ATTR_NSH_BASE_HEADER attribute for mdtype and then validate
the unstructured MD attribute size manually. Not a big deal, though.
I don't have strong opinion here.

But I do have strong opinion that MD needs to go into a separate
attribute, whether there are separate attributes for MD1/2 or not.

 Jiri

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-11 10:09             ` Jan Scheurich
       [not found]               ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D7273679A-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
@ 2017-08-11 10:35               ` Jiri Pirko
  1 sibling, 0 replies; 17+ messages in thread
From: Jiri Pirko @ 2017-08-11 10:35 UTC (permalink / raw)
  To: Jan Scheurich; +Cc: Jiri Benc, Yang, Yi Y, netdev, dev

Fri, Aug 11, 2017 at 12:09:36PM CEST, jan.scheurich@ericsson.com wrote:
>> -----Original Message-----
>> From: Jiri Benc [mailto:jbenc@redhat.com]
>> Sent: Friday, 11 August, 2017 11:45
>> 
>> The context field does not apply to MD type 2. It looks wrong for the
>> context field to be included in netlink attribute for anything other
>> than MD type 1. Perhaps it needs to be put into a separate attribute,
>> too?
>> 
>> Note that I'm talking only about the uAPI. Internally, ovs can use
>> struct ovs_key_nsh that is MD type 1 only, there's no problem changing
>> that later. But for the user space interface, this needs to be clean.
>> This can be solved for example this way:
>> 
>> In include/uapi/linux/openvswitch.h:
>> 
>> struct ovs_key_nsh_base {
>> 	__u8 flags;
>> 	__u8 mdtype;
>> 	__u8 np;
>> 	__u8 pad;
>> 	__be32 path_hdr;
>> };
>> 
>> + one more netlink attribute carrying MD type 1 info. Will probably
>> require to change OVS_KEY_ATTR_NSH to a nested attribute etc.
>> 
>> In net/openvswitch/flow.h (or perhaps a different header would be more
>> appropriate?):
>> 
>> struct ovs_key_nsh {
>> 	struct ovs_key_nsh_base base;
>> 	__be32 context[4];
>> };
>> 
>> Plus needed conversions between OVS_KEY_ATTR_NSH and struct ovs_key_nsh
>> when interfacing between the kernel and user space.
>> 
>> That way, we can have MD type 1 support only for now while still being
>> allowed to redesign things in whatever way later.
>> 
>>  Jiri
>
>For the OVS_KEY_ATTR_NSH I agree to move the fixed size MD1 context headers from nsh_base to a separate struct and use nested netlink attributes.
>
>For OVS_ACTION_ATTR_PUSH_NSH attribute any metadata included is opaque to the datapath and should be copied as is into the packet header. I doubt that there is any benefit to model this with nested attributes for MD1 or MD2. This just adds complexity on sender and receiver side and requires updates in case there should be other MD types added later on.
>
>Unless someone can explain to me why the datapath should understand the internal structure/format of metadata in push_nsh, I would strongly vote to keep the metadata as variable length octet sequence in the non-structured OVS_ACTION_ATTR_PUSH_NSH

Could you please wrap lines at 72 chars? This is unreadable. Thanks!

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-11 10:22                 ` Jiri Benc
@ 2017-08-13 21:13                   ` Jan Scheurich
       [not found]                     ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D72736EAE-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Jan Scheurich @ 2017-08-13 21:13 UTC (permalink / raw)
  To: Jiri Benc; +Cc: Yang, Yi Y, netdev, dev

> From: Jiri Benc [mailto:jbenc@redhat.com]
> Sent: Friday, 11 August, 2017 12:23
> 
> On Fri, 11 Aug 2017 10:09:36 +0000, Jan Scheurich wrote:
> > Unless someone can explain to me why the datapath should understand the
> > internal structure/format of metadata in push_nsh, I would strongly
> > vote to keep the metadata as variable length octet sequence in the
> > non-structured OVS_ACTION_ATTR_PUSH_NSH
> 
> Could be but it still needs to be in a different attribute and not in
> the ovs_action_push_nsh structure.
> 
> Separate attributes for MD1/MD2 has the advantage of easier validation:
> with a separate MD1 type attribute, the size check is easier. With an
> unstructured MD attribute, we'd need to look into the
> OVS_ACTION_ATTR_NSH_BASE_HEADER attribute for mdtype and then validate
> the unstructured MD attribute size manually. Not a big deal, though.
> I don't have strong opinion here.
> 
> But I do have strong opinion that MD needs to go into a separate
> attribute, whether there are separate attributes for MD1/2 or not.

Jiri, I am not too familiar with conventions on the OVS netlink interface regarding the handling of variable length fields. What is the benefit of structuring the push_nsh action into

OVS_ACTION_ATTR_PUSH_NSH
+-- OVS_ACTION_ATTR_NSH_BASE_HEADER
+-- OVS_ACTION_ATTR_NSH_MD

instead of grouping the base header fields and the variable length MD into a single monolithic attribute OVS_ACTION_ATTR_PUSH_NSH? Is the main concern a potential future need to add additional parameters to the push_nsh datapath action? Are there examples for such structured actions other than OVS_ACTION_ATTR_CT where the need is obvious because it is very polymorphic?

BR, Jan

BTW:  The name OVS_ACTION_ATTR_NSH_BASE_HEADER is misleading because in the NSH draft the term base header is used for the first 32-bit word, whereas here it includes also the 32-bit Service Path header.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
       [not found]                     ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D72736EAE-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
@ 2017-08-14  7:51                       ` Jiri Benc
  2017-08-14 10:35                         ` Jan Scheurich
  0 siblings, 1 reply; 17+ messages in thread
From: Jiri Benc @ 2017-08-14  7:51 UTC (permalink / raw)
  To: Jan Scheurich; +Cc: netdev-u79uwXL29TY76Z2rM5mHXA, dev-yBygre7rU0TnMu66kgdUjQ

On Sun, 13 Aug 2017 21:13:57 +0000, Jan Scheurich wrote:
> Jiri, I am not too familiar with conventions on the OVS netlink
> interface regarding the handling of variable length fields. What is
> the benefit of structuring the push_nsh action into
> 
> OVS_ACTION_ATTR_PUSH_NSH
> +-- OVS_ACTION_ATTR_NSH_BASE_HEADER
> +-- OVS_ACTION_ATTR_NSH_MD
> 
> instead of grouping the base header fields and the variable length MD
> into a single monolithic attribute OVS_ACTION_ATTR_PUSH_NSH? Is the
> main concern a potential future need to add additional parameters to
> the push_nsh datapath action? Are there examples for such structured
> actions other than OVS_ACTION_ATTR_CT where the need is obvious
> because it is very polymorphic?

This is about having the design clean. What would you do if you had two
variable length fields? Would you still put them in a single structure
and have a length field in the structure, too? That would be wrong, we
have length in the attribute header. I doubt you would do that. Which
indicates that putting variable length fields to an attribute with
anything other is wrong.

Also, look at how ugly the code would be. You'd have to subtract the
base header length from the attribute length to get the variable data
length. That's not nice at all.

Think about the netlink in the way that by default, every field should
have its own attribute. Structures are included only for performance
reasons where certain fields are always passed in a message and having
them in separate attributes would be impractical and waste of space.
Going out of your way to include the context data in the structure thus
doesn't make sense.

> BTW:  The name OVS_ACTION_ATTR_NSH_BASE_HEADER is misleading because
> in the NSH draft the term base header is used for the first 32-bit
> word, whereas here it includes also the 32-bit Service Path header.

An excellent comment. This means that it's very well possible that
future NSH versions may not include SP header or may have it of a
different size. Maybe we should consider putting it into a separate
attribute, too? Not sure it is needed, though, I don't think it's
likely to happen.

 Jiri

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-14  7:51                       ` Jiri Benc
@ 2017-08-14 10:35                         ` Jan Scheurich
       [not found]                           ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D727373EA-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Jan Scheurich @ 2017-08-14 10:35 UTC (permalink / raw)
  To: Jiri Benc; +Cc: Yang, Yi Y, netdev, dev

> From: Jiri Benc [mailto:jbenc@redhat.com]
> Sent: Monday, 14 August, 2017 09:51
> 
> On Sun, 13 Aug 2017 21:13:57 +0000, Jan Scheurich wrote:
> > Jiri, I am not too familiar with conventions on the OVS netlink
> > interface regarding the handling of variable length fields. What is
> > the benefit of structuring the push_nsh action into
> >
> > OVS_ACTION_ATTR_PUSH_NSH
> > +-- OVS_ACTION_ATTR_NSH_BASE_HEADER
> > +-- OVS_ACTION_ATTR_NSH_MD
> >
> > instead of grouping the base header fields and the variable length MD
> > into a single monolithic attribute OVS_ACTION_ATTR_PUSH_NSH? Is the
> > main concern a potential future need to add additional parameters to
> > the push_nsh datapath action? Are there examples for such structured
> > actions other than OVS_ACTION_ATTR_CT where the need is obvious
> > because it is very polymorphic?
> 
> This is about having the design clean. What would you do if you had two
> variable length fields? Would you still put them in a single structure
> and have a length field in the structure, too? That would be wrong, we
> have length in the attribute header. I doubt you would do that. Which
> indicates that putting variable length fields to an attribute with
> anything other is wrong.
> 
> Also, look at how ugly the code would be. You'd have to subtract the
> base header length from the attribute length to get the variable data
> length. That's not nice at all.
> 
> Think about the netlink in the way that by default, every field should
> have its own attribute. Structures are included only for performance
> reasons where certain fields are always passed in a message and having
> them in separate attributes would be impractical and waste of space.
> Going out of your way to include the context data in the structure thus
> doesn't make sense.
> 
> > BTW:  The name OVS_ACTION_ATTR_NSH_BASE_HEADER is misleading
> because
> > in the NSH draft the term base header is used for the first 32-bit
> > word, whereas here it includes also the 32-bit Service Path header.
> 
> An excellent comment. This means that it's very well possible that
> future NSH versions may not include SP header or may have it of a
> different size. Maybe we should consider putting it into a separate
> attribute, too? Not sure it is needed, though, I don't think it's
> likely to happen.

I think the NSH RFC draft is pretty clear that the NSH header (Version 0) will always contain the Base Header, the Service Header and a (possibly empty) set of Context Headers. The length of the context headers is implied by the total NSH header Length in the Base Header. The internal structure of the context headers, implied by the "MD type" in the base header, is irrelevant for the push_nsh action as it is managed by the ofproto-dpif layer. The current NSH header format simply does not allow for a second variable length section in the header.

Is it worth to speculate on how a hypothetical future NSH version (with a different Version value in the Base header) might look like? If this should ever arise, we could introduce a new push_nsh_v2 action. 

In summary, I'm still not convinced that we gain any significant generality by splitting up the OVS_ACTION_ATTR_PUSH_NSH action into nested attributes. It will only slow down the execution of the action in the datapath (or requires some optimization in the datapath to merge the attributes into a single internal action struct).

Note: For the OVS_KEY_ATTR_NSH we should split up in to OVS_KEY_ATTR_NSH_FIXED_HEADER (covering Base and Service headers) and OVS_KEY_ATTR_NSH_MD1.

Jan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
       [not found]                           ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D727373EA-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
@ 2017-08-14 10:47                             ` Jiri Benc
  2017-08-14 11:08                               ` Jan Scheurich
  0 siblings, 1 reply; 17+ messages in thread
From: Jiri Benc @ 2017-08-14 10:47 UTC (permalink / raw)
  To: Jan Scheurich; +Cc: netdev-u79uwXL29TY76Z2rM5mHXA, dev-yBygre7rU0TnMu66kgdUjQ

On Mon, 14 Aug 2017 10:35:42 +0000, Jan Scheurich wrote:
> Is it worth to speculate on how a hypothetical future NSH version
> (with a different Version value in the Base header) might look like?

Absolutely. This is uAPI we're talking about and once merged, it's set
in stone. Whatever we come up with should allow future extensibility.

> If this should ever arise, we could introduce a new push_nsh_v2
> action.

Which would mean we failed with the design. We would have to maintain
two parallel APIs forever. This is not how the design should be made.

 Jiri

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-14 10:47                             ` Jiri Benc
@ 2017-08-14 11:08                               ` Jan Scheurich
  0 siblings, 0 replies; 17+ messages in thread
From: Jan Scheurich @ 2017-08-14 11:08 UTC (permalink / raw)
  To: Jiri Benc; +Cc: netdev-u79uwXL29TY76Z2rM5mHXA, dev-yBygre7rU0TnMu66kgdUjQ

> From: Jiri Benc [mailto:jbenc-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org]
> Sent: Monday, 14 August, 2017 12:48
> 
> On Mon, 14 Aug 2017 10:35:42 +0000, Jan Scheurich wrote:
> > Is it worth to speculate on how a hypothetical future NSH version
> > (with a different Version value in the Base header) might look like?
> 
> Absolutely. This is uAPI we're talking about and once merged, it's set
> in stone. Whatever we come up with should allow future extensibility.
> 
> > If this should ever arise, we could introduce a new push_nsh_v2
> > action.
> 
> Which would mean we failed with the design. We would have to maintain
> two parallel APIs forever. This is not how the design should be made.

Well, if that is the ultimate design goal, we'll have no choice.

But if the hypothetic next NSH version looks entirely different, we will have to manage the incompatibility anyhow on the level of the nested attributes. So the only thing we will for sure manage to keep fixed might be the empty wrapper OVS_ACTION_ATTR_PUSH_NSH...

We decided earlier on to go for dedicated push/pop_<Header> actions in the datapath instead of a single pair of (very polymorphic) generic encap/decap datapath actions to make the implementation in the datapath more explicit and straightforward. Why not follow the same philosophy also when we need to support push/pop for incompatible versions of the same protocol?

Jan

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
       [not found] ` <1502371275-52446-1-git-send-email-yi.y.yang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  2017-08-11  8:24   ` Jiri Benc
@ 2017-08-14 16:09   ` Eric Garver
  2017-08-15  0:39     ` Yang, Yi
  1 sibling, 1 reply; 17+ messages in thread
From: Eric Garver @ 2017-08-14 16:09 UTC (permalink / raw)
  To: Yi Yang
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
	jbenc-H+wXaHxf7aLQT0dZR+AlfA

On Thu, Aug 10, 2017 at 09:21:15PM +0800, Yi Yang wrote:
> OVS master and 2.8 branch has merged NSH userspace
> patch series, this patch is to enable NSH support
> in kernel data path in order that OVS can support
> NSH in 2.8 release in compat mode by porting this.
> 
> Signed-off-by: Yi Yang <yi.y.yang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
>  drivers/net/vxlan.c              |   7 ++
>  include/net/nsh.h                | 126 ++++++++++++++++++++++++++++++
>  include/uapi/linux/openvswitch.h |  33 ++++++++
>  net/openvswitch/actions.c        | 165 +++++++++++++++++++++++++++++++++++++++
>  net/openvswitch/flow.c           |  41 ++++++++++
>  net/openvswitch/flow.h           |   1 +
>  net/openvswitch/flow_netlink.c   |  54 ++++++++++++-
>  7 files changed, 426 insertions(+), 1 deletion(-)
>  create mode 100644 include/net/nsh.h

Hi Yi,

In general I'd like to echo Jiri's comments on the netlink attributes.
I'd like to see the metadata separate.

I have a few other comments below.

Thanks.
Eric.

[..]
> diff --git a/include/net/nsh.h b/include/net/nsh.h
> new file mode 100644
> index 0000000..96477a1
> --- /dev/null
> +++ b/include/net/nsh.h
> @@ -0,0 +1,126 @@
> +#ifndef __NET_NSH_H
> +#define __NET_NSH_H 1
> +
> +
> +/*
> + * Network Service Header:
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * |Ver|O|C|R|R|R|R|R|R|    Length   |   MD Type   |  Next Proto   |
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * |                Service Path ID                | Service Index |
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * |                                                               |
> + * ~               Mandatory/Optional Context Header               ~
> + * |                                                               |
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * Ver = The version field is used to ensure backward compatibility
> + *       going forward with future NSH updates.  It MUST be set to 0x0
> + *       by the sender, in this first revision of NSH.
> + *
> + * O = OAM. when set to 0x1 indicates that this packet is an operations
> + *     and management (OAM) packet.  The receiving SFF and SFs nodes
> + *     MUST examine the payload and take appropriate action.
> + *
> + * C = context. Indicates that a critical metadata TLV is present.
> + *
> + * Length : total length, in 4-byte words, of NSH including the Base
> + *          Header, the Service Path Header and the optional variable
> + *          TLVs.
> + * MD Type: indicates the format of NSH beyond the mandatory Base Header
> + *          and the Service Path Header.
> + *
> + * Next Protocol: indicates the protocol type of the original packet. A
> + *          new IANA registry will be created for protocol type.
> + *
> + * Service Path Identifier (SPI): identifies a service path.
> + *          Participating nodes MUST use this identifier for Service
> + *          Function Path selection.
> + *
> + * Service Index (SI): provides location within the SFP.
> + *
> + * [0] https://tools.ietf.org/html/draft-ietf-sfc-nsh-13
> + */
> +
> +/**
> + * struct nsh_md1_ctx - Keeps track of NSH context data
> + * @nshc<1-4>: NSH Contexts.
> + */
> +struct nsh_md1_ctx {
> +	__be32 c[4];
> +};
> +
> +struct nsh_md2_tlv {
> +	__be16 md_class;
> +	u8 type;
> +	u8 length;
> +	u8 md_value[];
> +};
> +
> +struct nsh_hdr {
> +	__be16 ver_flags_len;
> +	u8 md_type;
> +	u8 next_proto;
> +	__be32 path_hdr;
> +	union {
> +	    struct nsh_md1_ctx md1;
> +	    struct nsh_md2_tlv md2[0];
> +	};
> +};
> +
> +/* Masking NSH header fields. */
> +#define NSH_VER_MASK       0xc000
> +#define NSH_VER_SHIFT      14
> +#define NSH_FLAGS_MASK     0x3fc0
> +#define NSH_FLAGS_SHIFT    6
> +#define NSH_LEN_MASK       0x003f
> +#define NSH_LEN_SHIFT      0
> +
> +#define NSH_SPI_MASK       0xffffff00
> +#define NSH_SPI_SHIFT      8
> +#define NSH_SI_MASK        0x000000ff
> +#define NSH_SI_SHIFT       0
> +
> +#define NSH_DST_PORT    4790     /* UDP Port for NSH on VXLAN. */
> +#define ETH_P_NSH       0x894F   /* Ethertype for NSH. */
> +
> +/* NSH Base Header Next Protocol. */
> +#define NSH_P_IPV4        0x01
> +#define NSH_P_IPV6        0x02
> +#define NSH_P_ETHERNET    0x03
> +#define NSH_P_NSH         0x04
> +#define NSH_P_MPLS        0x05
> +
> +/* MD Type Registry. */
> +#define NSH_M_TYPE1     0x01
> +#define NSH_M_TYPE2     0x02
> +#define NSH_M_EXP1      0xFE
> +#define NSH_M_EXP2      0xFF
> +
> +/* NSH Metadata Length. */
> +#define NSH_M_TYPE1_MDLEN 16
> +
> +/* NSH Base Header Length */
> +#define NSH_BASE_HDR_LEN  8
> +
> +/* NSH MD Type 1 header Length. */
> +#define NSH_M_TYPE1_LEN   24
> +
> +static inline u16
> +nsh_hdr_len(const struct nsh_hdr *nsh)
> +{
> +	return 4 * (ntohs(nsh->ver_flags_len) & NSH_LEN_MASK) >> NSH_LEN_SHIFT;

This is doing the multiplication before the shift. It works only because
the shift is 0.

> +}
> +
> +static inline struct nsh_md1_ctx *
> +nsh_md1_ctx(struct nsh_hdr *nsh)
> +{
> +	return &nsh->md1;
> +}
> +
> +static inline struct nsh_md2_tlv *
> +nsh_md2_ctx(struct nsh_hdr *nsh)
> +{
> +	return nsh->md2;
> +}
> +
> +#endif /* __NET_NSH_H */
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index 156ee4c..5a1c94b 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -333,6 +333,7 @@ enum ovs_key_attr {
>  	OVS_KEY_ATTR_CT_LABELS,	/* 16-octet connection tracking label */
>  	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
>  	OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
> +	OVS_KEY_ATTR_NSH,                  /* struct ovs_key_nsh */
>  
>  #ifdef __KERNEL__
>  	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
> @@ -491,6 +492,15 @@ struct ovs_key_ct_tuple_ipv6 {
>  	__u8   ipv6_proto;
>  };
>  
> +struct ovs_key_nsh {
> +	__u8 flags;
> +	__u8 mdtype;
> +	__u8 np;
> +	__u8 pad;
> +	__be32 path_hdr;
> +	__be32 c[4];
> +};
> +
>  /**
>   * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
>   * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
> @@ -769,6 +779,25 @@ struct ovs_action_push_eth {
>  	struct ovs_key_ethernet addresses;
>  };
>  
> +#define OVS_PUSH_NSH_MAX_MD_LEN 248
> +/*
> + * struct ovs_action_push_nsh - %OVS_ACTION_ATTR_PUSH_NSH
> + * @flags: NSH header flags.
> + * @mdtype: NSH metadata type.
> + * @mdlen: Length of NSH metadata in bytes.
> + * @np: NSH next_protocol: Inner packet type.
> + * @path_hdr: NSH service path id and service index.
> + * @metadata: NSH metadata for MD type 1 or 2
> + */
> +struct ovs_action_push_nsh {
> +	__u8 flags;
> +	__u8 mdtype;
> +	__u8 mdlen;
> +	__u8 np;
> +	__be32 path_hdr;
> +	__u8 metadata[];
> +};
> +
>  /**
>   * enum ovs_action_attr - Action types.
>   *
> @@ -806,6 +835,8 @@ struct ovs_action_push_eth {
>   * packet.
>   * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
>   * packet.
> + * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet.
> + * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
>   *
>   * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
>   * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
> @@ -835,6 +866,8 @@ enum ovs_action_attr {
>  	OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
>  	OVS_ACTION_ATTR_PUSH_ETH,     /* struct ovs_action_push_eth. */
>  	OVS_ACTION_ATTR_POP_ETH,      /* No argument. */
> +	OVS_ACTION_ATTR_PUSH_NSH,     /* struct ovs_action_push_nsh. */
> +	OVS_ACTION_ATTR_POP_NSH,      /* No argument. */
>  
>  	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
>  				       * from userspace. */
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index e461067..50f80bc 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -38,6 +38,7 @@
>  #include <net/dsfield.h>
>  #include <net/mpls.h>
>  #include <net/sctp/checksum.h>
> +#include <net/nsh.h>
>  
>  #include "datapath.h"
>  #include "flow.h"
> @@ -380,6 +381,114 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
>  	return 0;
>  }
>  
> +static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
> +		    const struct ovs_action_push_nsh *oapn)
> +{
> +	struct nsh_hdr *nsh;
> +	size_t length = NSH_BASE_HDR_LEN + oapn->mdlen;
> +	u8 next_proto;
> +
> +	if (key->mac_proto == MAC_PROTO_ETHERNET) {
> +		next_proto = NSH_P_ETHERNET;
> +	} else {
> +		switch (ntohs(skb->protocol)) {
> +		case ETH_P_IP:
> +			next_proto = NSH_P_IPV4;
> +			break;
> +		case ETH_P_IPV6:
> +			next_proto = NSH_P_IPV6;
> +			break;
> +		case ETH_P_NSH:
> +			next_proto = NSH_P_NSH;
> +			break;
> +		default:
> +			return -ENOTSUPP;
> +		}
> +	}
> +
>

I believe you need to validate that oapn->mdlen is a multiple of 4.

> +	/* Add the NSH header */
> +	if (skb_cow_head(skb, length) < 0)
> +		return -ENOMEM;
> +
> +	skb_push(skb, length);
> +	nsh = (struct nsh_hdr *)(skb->data);
> +	nsh->ver_flags_len = htons((oapn->flags << NSH_FLAGS_SHIFT) |
> +				 (length >> 2));
> +	nsh->next_proto = next_proto;
> +	nsh->path_hdr = oapn->path_hdr;
> +	nsh->md_type = oapn->mdtype;
> +	switch (nsh->md_type) {
> +	case NSH_M_TYPE1:
> +		nsh->md1 = *(struct nsh_md1_ctx *)oapn->metadata;
> +		break;
> +	case NSH_M_TYPE2: {
> +		/* The MD2 metadata in oapn is already padded to 4 bytes. */
> +		size_t len = DIV_ROUND_UP(oapn->mdlen, 4) * 4;
> +
> +		memcpy(nsh->md2, oapn->metadata, len);

I don't see any validation of oapn->mdlen. Normally this happens in
__ovs_nla_copy_actions(). It will be made easier if you add a separate
MD attribute as Jiri has suggested.

> +		break;
> +	}
> +	default:
> +		return -ENOTSUPP;
> +	}
> +
> +	if (!skb->inner_protocol)
> +		skb_set_inner_protocol(skb, skb->protocol);
> +
> +	skb->protocol = htons(ETH_P_NSH);
> +	key->eth.type = htons(ETH_P_NSH);
> +	skb_reset_mac_header(skb);
> +	skb_reset_mac_len(skb);
> +
> +	/* safe right before invalidate_flow_key */
> +	key->mac_proto = MAC_PROTO_NONE;
> +	invalidate_flow_key(key);
> +	return 0;
> +}
> +
[..]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH net-next v2] openvswitch: enable NSH support
  2017-08-14 16:09   ` Eric Garver
@ 2017-08-15  0:39     ` Yang, Yi
  0 siblings, 0 replies; 17+ messages in thread
From: Yang, Yi @ 2017-08-15  0:39 UTC (permalink / raw)
  To: Eric Garver
  Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
	jbenc-H+wXaHxf7aLQT0dZR+AlfA

On Tue, Aug 15, 2017 at 12:09:14AM +0800, Eric Garver wrote:
> On Thu, Aug 10, 2017 at 09:21:15PM +0800, Yi Yang wrote:
> 
> Hi Yi,
> 
> In general I'd like to echo Jiri's comments on the netlink attributes.
> I'd like to see the metadata separate.
> 
> I have a few other comments below.
> 
> Thanks.
> Eric.
> 
> [..]

Thanks Eric, I'm doing this and it is almost done, there is still an
issue to fix.

> > +{
> > +	return 4 * (ntohs(nsh->ver_flags_len) & NSH_LEN_MASK) >> NSH_LEN_SHIFT;
> 
> This is doing the multiplication before the shift. It works only because
> the shift is 0.
> 

Thank you for catching this, the right one:

((ntohs(nsh->ver_flags_len) & NSH_LEN_MASK) >> NSH_LEN_SHIFT) << 2

> > +static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
> > +		    const struct ovs_action_push_nsh *oapn)
> > +{
> > +	struct nsh_hdr *nsh;
> > +	size_t length = NSH_BASE_HDR_LEN + oapn->mdlen;
> > +	u8 next_proto;
> > +
> > +	if (key->mac_proto == MAC_PROTO_ETHERNET) {
> > +		next_proto = NSH_P_ETHERNET;
> > +	} else {
> > +		switch (ntohs(skb->protocol)) {
> > +		case ETH_P_IP:
> > +			next_proto = NSH_P_IPV4;
> > +			break;
> > +		case ETH_P_IPV6:
> > +			next_proto = NSH_P_IPV6;
> > +			break;
> > +		case ETH_P_NSH:
> > +			next_proto = NSH_P_NSH;
> > +			break;
> > +		default:
> > +			return -ENOTSUPP;
> > +		}
> > +	}
> > +
> >
> 
> I believe you need to validate that oapn->mdlen is a multiple of 4.
>

I'll add this check.

> > +	switch (nsh->md_type) {
> > +	case NSH_M_TYPE1:
> > +		nsh->md1 = *(struct nsh_md1_ctx *)oapn->metadata;
> > +		break;
> > +	case NSH_M_TYPE2: {
> > +		/* The MD2 metadata in oapn is already padded to 4 bytes. */
> > +		size_t len = DIV_ROUND_UP(oapn->mdlen, 4) * 4;
> > +
> > +		memcpy(nsh->md2, oapn->metadata, len);
> 
> I don't see any validation of oapn->mdlen. Normally this happens in
> __ovs_nla_copy_actions(). It will be made easier if you add a separate
> MD attribute as Jiri has suggested.
>

Got it, will include this in next version.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2017-08-15  0:39 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-10 13:21 [PATCH net-next v2] openvswitch: enable NSH support Yi Yang
     [not found] ` <1502371275-52446-1-git-send-email-yi.y.yang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2017-08-11  8:24   ` Jiri Benc
2017-08-11  8:47     ` Yang, Yi
2017-08-11  9:10       ` Jiri Benc
2017-08-11  9:24         ` Yang, Yi Y
2017-08-11  9:44           ` Jiri Benc
2017-08-11  9:54             ` Yang, Yi
2017-08-11 10:09             ` Jan Scheurich
     [not found]               ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D7273679A-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
2017-08-11 10:22                 ` Jiri Benc
2017-08-13 21:13                   ` Jan Scheurich
     [not found]                     ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D72736EAE-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
2017-08-14  7:51                       ` Jiri Benc
2017-08-14 10:35                         ` Jan Scheurich
     [not found]                           ` <CFF8EF42F1132E4CBE2BF0AB6C21C58D727373EA-hqolJogE5njKJFWPz4pdheaU1rCVNFv4@public.gmane.org>
2017-08-14 10:47                             ` Jiri Benc
2017-08-14 11:08                               ` Jan Scheurich
2017-08-11 10:35               ` Jiri Pirko
2017-08-14 16:09   ` Eric Garver
2017-08-15  0:39     ` Yang, Yi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.