All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andy Zhou <azhou@nicira.com>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, Jesse Gross <jesse@nicira.com>,
	Andy Zhou <azhou@nicira.com>
Subject: [net-next 06/10] openvswitch: Eliminate memset() from flow_extract.
Date: Tue, 22 Jul 2014 03:19:49 -0700	[thread overview]
Message-ID: <1406024393-6778-7-git-send-email-azhou@nicira.com> (raw)
In-Reply-To: <1406024393-6778-1-git-send-email-azhou@nicira.com>

From: Jesse Gross <jesse@nicira.com>

As new protocols are added, the size of the flow key tends to
increase although few protocols care about all of the fields. In
order to optimize this for hashing and matching, OVS uses a variable
length portion of the key. However, when fields are extracted from
the packet we must still zero out the entire key.

This is no longer necessary now that OVS implements masking. Any
fields (or holes in the structure) which are not part of a given
protocol will be by definition not part of the mask and zeroed out
during lookup. Furthermore, since masking already uses variable
length keys this zeroing operation automatically benefits as well.

In principle, the only thing that needs to be done at this point
is remove the memset() at the beginning of flow. However, some
fields assume that they are initialized to zero, which now must be
done explicitly. In addition, in the event of an error we must also
zero out corresponding fields to signal that there is no valid data
present. These increase the total amount of code but very little of
it is executed in non-error situations.

Removing the memset() reduces the profile of ovs_flow_extract()
from 0.64% to 0.56% when tested with large packets on a 10G link.

Suggested-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
---
 net/openvswitch/flow.c |   47 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index d07ab53..7691b11 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -272,6 +272,8 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 			key->ip.frag = OVS_FRAG_TYPE_LATER;
 		else
 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+	} else {
+		key->ip.frag = OVS_FRAG_TYPE_NONE;
 	}
 
 	nh_len = payload_ofs - nh_ofs;
@@ -356,6 +358,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
 	 */
 	key->tp.src = htons(icmp->icmp6_type);
 	key->tp.dst = htons(icmp->icmp6_code);
+	memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd));
 
 	if (icmp->icmp6_code == 0 &&
 	    (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -447,14 +450,18 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 	int error;
 	struct ethhdr *eth;
 
-	memset(key, 0, sizeof(*key));
-
 	key->phy.priority = skb->priority;
 	if (OVS_CB(skb)->tun_key)
 		memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key));
+	else
+		memset(&key->tun_key, 0, sizeof(key->tun_key));
+
 	key->phy.in_port = in_port;
 	key->phy.skb_mark = skb->mark;
 
+	/* Flags are always used as part of stats. */
+	key->tp.flags = 0;
+
 	skb_reset_mac_header(skb);
 
 	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
@@ -469,6 +476,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 	 * update skb->csum here.
 	 */
 
+	key->eth.tci = 0;
 	if (vlan_tx_tag_present(skb))
 		key->eth.tci = htons(skb->vlan_tci);
 	else if (eth->h_proto == htons(ETH_P_8021Q))
@@ -489,6 +497,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 
 		error = check_iphdr(skb);
 		if (unlikely(error)) {
+			memset(&key->ip, 0, sizeof(key->ip));
+			memset(&key->ipv4, 0, sizeof(key->ipv4));
 			if (error == -EINVAL) {
 				skb->transport_header = skb->network_header;
 				error = 0;
@@ -512,6 +522,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 		if (nh->frag_off & htons(IP_MF) ||
 			 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+		else
+			key->ip.frag = OVS_FRAG_TYPE_NONE;
 
 		/* Transport layer. */
 		if (key->ip.proto == IPPROTO_TCP) {
@@ -520,18 +532,24 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 				key->tp.src = tcp->source;
 				key->tp.dst = tcp->dest;
 				key->tp.flags = TCP_FLAGS_BE16(tcp);
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		} else if (key->ip.proto == IPPROTO_UDP) {
 			if (udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
 				key->tp.src = udp->source;
 				key->tp.dst = udp->dest;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		} else if (key->ip.proto == IPPROTO_SCTP) {
 			if (sctphdr_ok(skb)) {
 				struct sctphdr *sctp = sctp_hdr(skb);
 				key->tp.src = sctp->source;
 				key->tp.dst = sctp->dest;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		} else if (key->ip.proto == IPPROTO_ICMP) {
 			if (icmphdr_ok(skb)) {
@@ -541,16 +559,19 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 				 * them in 16-bit network byte order. */
 				key->tp.src = htons(icmp->type);
 				key->tp.dst = htons(icmp->code);
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		}
 
-	} else if ((key->eth.type == htons(ETH_P_ARP) ||
-		   key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) {
+	} else if (key->eth.type == htons(ETH_P_ARP) ||
+		   key->eth.type == htons(ETH_P_RARP)) {
 		struct arp_eth_header *arp;
 
 		arp = (struct arp_eth_header *)skb_network_header(skb);
 
-		if (arp->ar_hrd == htons(ARPHRD_ETHER)
+		if (arphdr_ok(skb)
+				&& arp->ar_hrd == htons(ARPHRD_ETHER)
 				&& arp->ar_pro == htons(ETH_P_IP)
 				&& arp->ar_hln == ETH_ALEN
 				&& arp->ar_pln == 4) {
@@ -558,16 +579,24 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 			/* We only match on the lower 8 bits of the opcode. */
 			if (ntohs(arp->ar_op) <= 0xff)
 				key->ip.proto = ntohs(arp->ar_op);
+			else
+				key->ip.proto = 0;
+
 			memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
 			memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
 			ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
 			ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
+		} else {
+			memset(&key->ip, 0, sizeof(key->ip));
+			memset(&key->ipv4, 0, sizeof(key->ipv4));
 		}
 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		int nh_len;             /* IPv6 Header + Extensions */
 
 		nh_len = parse_ipv6hdr(skb, key);
 		if (unlikely(nh_len < 0)) {
+			memset(&key->ip, 0, sizeof(key->ip));
+			memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
 			if (nh_len == -EINVAL) {
 				skb->transport_header = skb->network_header;
 				error = 0;
@@ -589,24 +618,32 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 				key->tp.src = tcp->source;
 				key->tp.dst = tcp->dest;
 				key->tp.flags = TCP_FLAGS_BE16(tcp);
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		} else if (key->ip.proto == NEXTHDR_UDP) {
 			if (udphdr_ok(skb)) {
 				struct udphdr *udp = udp_hdr(skb);
 				key->tp.src = udp->source;
 				key->tp.dst = udp->dest;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		} else if (key->ip.proto == NEXTHDR_SCTP) {
 			if (sctphdr_ok(skb)) {
 				struct sctphdr *sctp = sctp_hdr(skb);
 				key->tp.src = sctp->source;
 				key->tp.dst = sctp->dest;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		} else if (key->ip.proto == NEXTHDR_ICMP) {
 			if (icmp6hdr_ok(skb)) {
 				error = parse_icmpv6(skb, key, nh_len);
 				if (error)
 					return error;
+			} else {
+				memset(&key->tp, 0, sizeof(key->tp));
 			}
 		}
 	}
-- 
1.7.9.5

  parent reply	other threads:[~2014-07-22 10:32 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-22 10:19 [net-next 00/10] Add Geneve Andy Zhou
2014-07-22 10:19 ` [net-next 01/10] net: Rename ndo_add_vxlan_port to ndo_add_udp_tunnel_port Andy Zhou
2014-07-22 10:49   ` Varka Bhadram
2014-07-24  6:40   ` Or Gerlitz
2014-07-24 20:28     ` Andy Zhou
2014-07-22 10:19 ` [net-next 02/10] udp: Expand UDP tunnel common APIs Andy Zhou
     [not found]   ` <CA+mtBx9M_BpjT-_Egng+jFxmqJzdC2Npg0ufE2ZSAb9Lhw8hxg@mail.gmail.com>
2014-07-22 21:02     ` Andy Zhou
2014-07-22 21:16       ` Tom Herbert
2014-07-22 21:56         ` Jesse Gross
2014-07-22 22:38           ` Tom Herbert
2014-07-22 22:55             ` Alexander Duyck
2014-07-22 23:24               ` Tom Herbert
2014-07-23  2:16                 ` Alexander Duyck
2014-07-23  3:53                   ` Tom Herbert
2014-07-23  4:35                     ` Jesse Gross
2014-07-23 15:45                       ` Tom Herbert
2014-07-24  3:24                         ` Jesse Gross
2014-07-22 23:12             ` Jesse Gross
2014-07-23 19:57   ` Tom Herbert
2014-07-24 20:23     ` Andy Zhou
2014-07-24 20:47       ` Tom Herbert
2014-07-24 20:54         ` Andy Zhou
2014-07-22 10:19 ` [net-next 03/10] vxlan: Remove vxlan_get_rx_port() Andy Zhou
     [not found]   ` <CAKgT0UeRSc3MaZrLmXyx4jPZO+F1hS5imR1TjFkvKp4S8nQmeg@mail.gmail.com>
2014-07-23  3:57     ` Andy Zhou
2014-07-22 10:19 ` [net-next 04/10] net: Refactor vxlan driver to make use of common UDP tunnel functions Andy Zhou
2014-07-24  6:46   ` Or Gerlitz
2014-07-22 10:19 ` [net-next 05/10] net: Add Geneve tunneling protocol driver Andy Zhou
2014-07-22 23:12   ` Alexander Duyck
2014-07-22 23:24     ` Jesse Gross
2014-07-23 14:11       ` John W. Linville
2014-07-23 18:20   ` Stephen Hemminger
2014-07-22 10:19 ` Andy Zhou [this message]
2014-07-22 10:19 ` [net-next 07/10] openvswitch: Add support for matching on OAM packets Andy Zhou
2014-07-22 10:19 ` [net-next 08/10] openvswitch: Wrap struct ovs_key_ipv4_tunnel in a new structure Andy Zhou
2014-07-22 10:19 ` [net-next 09/10] openvswitch: Factor out allocation and verification of actions Andy Zhou
2014-07-22 10:19 ` [net-next 10/10] openvswitch: Add support for Geneve tunneling Andy Zhou
2014-07-23 20:29   ` Tom Herbert
2014-07-24  4:10     ` Jesse Gross
     [not found]       ` <CA+mtBx9umxiFYtnG1kzFkK+Ev=b=4f3q2OOow2QcfCB5rUTUyA@mail.gmail.com>
2014-07-24 22:59         ` Jesse Gross
2014-07-24 23:45           ` Tom Herbert
2014-07-25  1:04             ` Jesse Gross
2014-07-22 10:54 ` [net-next 00/10] Add Geneve Varka Bhadram
2014-07-24  6:58 ` Or Gerlitz
2014-07-24 17:40   ` Tom Herbert
2014-07-24 21:03     ` Andy Zhou
2014-07-24 22:03       ` Tom Herbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1406024393-6778-7-git-send-email-azhou@nicira.com \
    --to=azhou@nicira.com \
    --cc=davem@davemloft.net \
    --cc=jesse@nicira.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.