All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/9] support unique MAC addresses for slave devices
@ 2017-04-27 14:51 Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 1/9] ipvlan: fix coding style for the ipvlan tree Marco Chiappero
                   ` (9 more replies)
  0 siblings, 10 replies; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

Currently every slave device gets assigned the same MAC address, by
having it copied from the master interface. Since some code paths
depend on this identity, changing the MAC address on slave interfaces
is not supported. However identical MAC addresses can pose problems to
management and orchestration software that correctly expect network
interfaces on the same segment to have unique addresses.

Patches 1-8 include style fixes and refactoring (patch 9 depends upon)
that improve the overal quality and make the intruduction of the
feature straightforward.

Patch 9 enables slave devices to own unique MAC addresses and change
such addresses live, fixing lack of support and a related bug, as
MAC address changes on master were not propagated to slave devices.
In order to preserve the main peculiarity of this driver, that is
exposing only a single MAC address for outbound traffic, frames
egressing from master are now effectively masquerated when working in
L2 mode.

Marco Chiappero (9):
  ipvlan: fix coding style for the ipvlan tree
  ipvlan: refactor ipvlan_process_multicast for readability
  ipvlan: replace ipvlan_rcv_frame
  ipvlan: rework the IP lookup function
  ipvlan: improve and uniform naming
  ipvlan: reposition three functions
  ipvlan: relocate ipvlan_skb_crossing_ns calls
  ipvlan: improve compiler hints
  ipvlan: introduce individual MAC addresses

 drivers/net/ipvlan/ipvlan.h      |   2 +-
 drivers/net/ipvlan/ipvlan_core.c | 592 ++++++++++++++++++++-------------------
 drivers/net/ipvlan/ipvlan_main.c |  49 ++--
 drivers/net/ipvlan/ipvtap.c      |   1 +
 4 files changed, 333 insertions(+), 311 deletions(-)

-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH net-next 1/9] ipvlan: fix coding style for the ipvlan tree
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
@ 2017-04-27 14:51 ` Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 2/9] ipvlan: refactor ipvlan_process_multicast for readability Marco Chiappero
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

Update the ipvlan and ipvtap drivers to comply to the Linux coding
style, fixing errors and warnings reported by checkpatch.

Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 29 ++++++++++++++++-------------
 drivers/net/ipvlan/ipvlan_main.c | 31 ++++++++++++++-----------------
 drivers/net/ipvlan/ipvtap.c      |  1 +
 3 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 1f3295e..1266f01 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -17,7 +17,7 @@ void ipvlan_init_secret(void)
 }
 
 void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
-			    unsigned int len, bool success, bool mcast)
+		     unsigned int len, bool success, bool mcast)
 {
 	if (likely(success)) {
 		struct ipvl_pcpu_stats *pcptr;
@@ -95,9 +95,9 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 
 	list_for_each_entry(addr, &ipvlan->addrs, anode) {
 		if ((is_v6 && addr->atype == IPVL_IPV6 &&
-		    ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
+		     ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
 		    (!is_v6 && addr->atype == IPVL_IPV4 &&
-		    addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
+		     addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
 			return addr;
 	}
 	return NULL;
@@ -179,7 +179,7 @@ static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type)
 
 unsigned int ipvlan_mac_hash(const unsigned char *addr)
 {
-	u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
+	u32 hash = jhash_1word(__get_unaligned_cpu32(addr + 2),
 			       ipvlan_jhash_secret);
 
 	return hash & IPVLAN_MAC_FILTER_MASK;
@@ -234,11 +234,13 @@ void ipvlan_process_multicast(struct work_struct *work)
 				nskb->pkt_type = pkt_type;
 				nskb->dev = ipvlan->dev;
 				if (tx_pkt)
-					ret = dev_forward_skb(ipvlan->dev, nskb);
+					ret = dev_forward_skb(ipvlan->dev,
+							      nskb);
 				else
 					ret = netif_rx(nskb);
 			}
-			ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
+			ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS,
+					true);
 			local_bh_enable();
 		}
 		rcu_read_unlock();
@@ -461,11 +463,11 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 		skb_reset_network_header(skb);
 	}
 
-	if (skb->protocol == htons(ETH_P_IPV6))
+	if (skb->protocol == htons(ETH_P_IPV6)) {
 		ret = ipvlan_process_v6_outbound(skb);
-	else if (skb->protocol == htons(ETH_P_IP))
+	} else if (skb->protocol == htons(ETH_P_IP)) {
 		ret = ipvlan_process_v4_outbound(skb);
-	else {
+	} else {
 		pr_warn_ratelimited("Dropped outbound packet type=%x\n",
 				    ntohs(skb->protocol));
 		kfree_skb(skb);
@@ -534,7 +536,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 	if (ether_addr_equal(eth->h_dest, eth->h_source)) {
 		lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
 		if (lyr3h) {
-			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
+			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h,
+						  addr_type, true);
 			if (addr)
 				return ipvlan_rcv_frame(addr, &skb, true);
 		}
@@ -570,7 +573,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
 		goto out;
 
-	switch(port->mode) {
+	switch (port->mode) {
 	case IPVLAN_MODE_L2:
 		return ipvlan_xmit_mode_l2(skb, dev);
 	case IPVLAN_MODE_L3:
@@ -580,7 +583,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* Should not reach here */
 	WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n",
-			  port->mode);
+		  port->mode);
 out:
 	kfree_skb(skb);
 	return NET_XMIT_DROP;
@@ -685,7 +688,7 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
 
 	/* Should not reach here */
 	WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
-			  port->mode);
+		  port->mode);
 	kfree_skb(skb);
 	return RX_HANDLER_CONSUMED;
 }
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index aa8575c..b837807 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -9,7 +9,7 @@
 
 #include "ipvlan.h"
 
-static u32 ipvl_nf_hook_refcnt = 0;
+static u32 ipvl_nf_hook_refcnt;
 
 static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
 	{
@@ -73,8 +73,9 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
 			if (!err) {
 				mdev->l3mdev_ops = &ipvl_l3mdev_ops;
 				mdev->priv_flags |= IFF_L3MDEV_MASTER;
-			} else
+			} else {
 				return err;
+			}
 		} else if (port->mode == IPVLAN_MODE_L3S) {
 			/* Old mode was L3S */
 			mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
@@ -107,7 +108,7 @@ static int ipvlan_port_create(struct net_device *dev)
 		return -EBUSY;
 	}
 
-	port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL);
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
 	if (!port)
 		return -ENOMEM;
 
@@ -163,7 +164,7 @@ static void ipvlan_port_destroy(struct net_device *dev)
 	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 
 #define IPVLAN_STATE_MASK \
-	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
+	((1 << __LINK_STATE_NOCARRIER) | (1 << __LINK_STATE_DORMANT))
 
 static int ipvlan_init(struct net_device *dev)
 {
@@ -274,7 +275,7 @@ static void ipvlan_change_rx_flags(struct net_device *dev, int change)
 	struct net_device *phy_dev = ipvlan->phy_dev;
 
 	if (change & IFF_ALLMULTI)
-		dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
+		dev_set_allmulti(phy_dev, (dev->flags & IFF_ALLMULTI) ? 1 : -1);
 }
 
 static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
@@ -319,7 +320,7 @@ static void ipvlan_get_stats64(struct net_device *dev,
 		for_each_possible_cpu(idx) {
 			pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx);
 			do {
-				strt= u64_stats_fetch_begin_irq(&pcptr->syncp);
+				strt = u64_stats_fetch_begin_irq(&pcptr->syncp);
 				rx_pkts = pcptr->rx_pkts;
 				rx_bytes = pcptr->rx_bytes;
 				rx_mcast = pcptr->rx_mcast;
@@ -386,7 +387,7 @@ static const struct net_device_ops ipvlan_netdev_ops = {
 
 static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
 			      unsigned short type, const void *daddr,
-			      const void *saddr, unsigned len)
+			      const void *saddr, unsigned int len)
 {
 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct net_device *phy_dev = ipvlan->phy_dev;
@@ -400,7 +401,7 @@ static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
 }
 
 static const struct header_ops ipvlan_header_ops = {
-	.create  	= ipvlan_hard_header,
+	.create		= ipvlan_hard_header,
 	.parse		= eth_header_parse,
 	.cache		= eth_header_cache,
 	.cache_update	= eth_header_cache_update,
@@ -571,13 +572,12 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 		goto remove_ida;
 
 	err = netdev_upper_dev_link(phy_dev, dev);
-	if (err) {
+	if (err)
 		goto unregister_netdev;
-	}
+
 	err = ipvlan_set_port_mode(port, mode);
-	if (err) {
+	if (err)
 		goto unlink_netdev;
-	}
 
 	list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
 	netif_stacked_transfer_operstate(phy_dev, dev);
@@ -627,8 +627,7 @@ void ipvlan_link_setup(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(ipvlan_link_setup);
 
-static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
-{
+static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = {
 	[IFLA_IPVLAN_MODE] = { .type = NLA_U16 },
 };
 
@@ -709,7 +708,7 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 {
 	struct ipvl_addr *addr;
 
-	addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
+	addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
 	if (!addr)
 		return -ENOMEM;
 
@@ -743,8 +742,6 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
 	ipvlan_ht_addr_del(addr);
 	list_del(&addr->anode);
 	kfree_rcu(addr, rcu);
-
-	return;
 }
 
 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index 2b713b6..04b2d50 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -32,6 +32,7 @@ static struct cdev ipvtap_cdev;
 static const void *ipvtap_net_namespace(struct device *d)
 {
 	struct net_device *dev = to_net_dev(d->parent);
+
 	return dev_net(dev);
 }
 
-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH net-next 2/9] ipvlan: refactor ipvlan_process_multicast for readability
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 1/9] ipvlan: fix coding style for the ipvlan tree Marco Chiappero
@ 2017-04-27 14:51 ` Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 3/9] ipvlan: replace ipvlan_rcv_frame Marco Chiappero
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

The function ipvlan_process_multicast both dequeues and dispatches
packets from/to ipvlan slaves. Decouple these two steps by introducing
ipvlan_dispatch_multicast, in order to reduce indentation and improve
the overall readability.

Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
Tested-by: Marco Chiappero <marco.chiappero@intel.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 107 +++++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 1266f01..fd40c25 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -185,18 +185,69 @@ unsigned int ipvlan_mac_hash(const unsigned char *addr)
 	return hash & IPVLAN_MAC_FILTER_MASK;
 }
 
+static void ipvlan_dispatch_multicast(struct ipvl_port *port,
+				      struct sk_buff *skb, u8 pkt_type,
+				      unsigned int mac_hash)
+{
+	struct ipvl_dev *ipvlan;
+	struct sk_buff *nskb;
+	struct net_device *dev = skb->dev;
+	bool tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
+	bool consumed = false;
+	unsigned int len;
+	int ret;
+
+	/* dispatch to slaves */
+	rcu_read_lock();
+	list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+		if (tx_pkt && (ipvlan->dev == skb->dev))
+			continue;
+		if (!test_bit(mac_hash, ipvlan->mac_filters))
+			continue;
+		if (!(ipvlan->dev->flags & IFF_UP))
+			continue;
+		ret = NET_RX_DROP;
+		len = skb->len + ETH_HLEN;
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		local_bh_disable();
+		if (nskb) {
+			consumed = true;
+			nskb->pkt_type = pkt_type;
+			nskb->dev = ipvlan->dev;
+			if (tx_pkt)
+				ret = dev_forward_skb(ipvlan->dev, nskb);
+			else
+				ret = netif_rx(nskb);
+		}
+		ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
+		local_bh_enable();
+	}
+	rcu_read_unlock();
+
+	if (tx_pkt) {
+		/* If the packet originated here, send it out. */
+		skb->dev = port->dev;
+		skb->pkt_type = pkt_type;
+		dev_queue_xmit(skb);
+	} else {
+		if (consumed)
+			consume_skb(skb);
+		else
+			kfree_skb(skb);
+	}
+
+	if (dev)
+		dev_put(dev);
+}
+
 void ipvlan_process_multicast(struct work_struct *work)
 {
 	struct ipvl_port *port = container_of(work, struct ipvl_port, wq);
 	struct ethhdr *ethh;
-	struct ipvl_dev *ipvlan;
-	struct sk_buff *skb, *nskb;
+	struct sk_buff *skb;
 	struct sk_buff_head list;
-	unsigned int len;
 	unsigned int mac_hash;
-	int ret;
 	u8 pkt_type;
-	bool tx_pkt;
 
 	__skb_queue_head_init(&list);
 
@@ -205,11 +256,7 @@ void ipvlan_process_multicast(struct work_struct *work)
 	spin_unlock_bh(&port->backlog.lock);
 
 	while ((skb = __skb_dequeue(&list)) != NULL) {
-		struct net_device *dev = skb->dev;
-		bool consumed = false;
-
 		ethh = eth_hdr(skb);
-		tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
 		mac_hash = ipvlan_mac_hash(ethh->h_dest);
 
 		if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
@@ -217,47 +264,7 @@ void ipvlan_process_multicast(struct work_struct *work)
 		else
 			pkt_type = PACKET_MULTICAST;
 
-		rcu_read_lock();
-		list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
-			if (tx_pkt && (ipvlan->dev == skb->dev))
-				continue;
-			if (!test_bit(mac_hash, ipvlan->mac_filters))
-				continue;
-			if (!(ipvlan->dev->flags & IFF_UP))
-				continue;
-			ret = NET_RX_DROP;
-			len = skb->len + ETH_HLEN;
-			nskb = skb_clone(skb, GFP_ATOMIC);
-			local_bh_disable();
-			if (nskb) {
-				consumed = true;
-				nskb->pkt_type = pkt_type;
-				nskb->dev = ipvlan->dev;
-				if (tx_pkt)
-					ret = dev_forward_skb(ipvlan->dev,
-							      nskb);
-				else
-					ret = netif_rx(nskb);
-			}
-			ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS,
-					true);
-			local_bh_enable();
-		}
-		rcu_read_unlock();
-
-		if (tx_pkt) {
-			/* If the packet originated here, send it out. */
-			skb->dev = port->dev;
-			skb->pkt_type = pkt_type;
-			dev_queue_xmit(skb);
-		} else {
-			if (consumed)
-				consume_skb(skb);
-			else
-				kfree_skb(skb);
-		}
-		if (dev)
-			dev_put(dev);
+		ipvlan_dispatch_multicast(port, skb, pkt_type, mac_hash);
 	}
 }
 
-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH net-next 3/9] ipvlan: replace ipvlan_rcv_frame
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 1/9] ipvlan: fix coding style for the ipvlan tree Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 2/9] ipvlan: refactor ipvlan_process_multicast for readability Marco Chiappero
@ 2017-04-27 14:51 ` Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 4/9] ipvlan: rework the IP lookup function Marco Chiappero
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

So far the ipvlan_rcv_frame function handled the reception for slaves of
both externally and internally (from the same port) originated packets.
However they need significantly different processing, with almost no
code to share.

This patch modifies ipvlan_rcv_frame to deal with internal packets only,
renaming it to ipvlan_rcv_int_frame, and introduces a new
ipvlan_rcv_ext_frame function for externally originated ones.

Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
Tested-by: Marco Chiappero <marco.chiappero@intel.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 58 +++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index fd40c25..4683bad 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -280,46 +280,36 @@ static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
 		skb->dev = dev;
 }
 
-static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
-			    bool local)
+static int ipvlan_rcv_int_frame(struct ipvl_addr *addr, struct sk_buff **pskb)
 {
 	struct ipvl_dev *ipvlan = addr->master;
 	struct net_device *dev = ipvlan->dev;
-	unsigned int len;
-	rx_handler_result_t ret = RX_HANDLER_CONSUMED;
-	bool success = false;
 	struct sk_buff *skb = *pskb;
+	unsigned int len = skb->len + ETH_HLEN;
+	bool success = false;
 
-	len = skb->len + ETH_HLEN;
 	/* Only packets exchanged between two local slaves need to have
 	 * device-up check as well as skb-share check.
 	 */
-	if (local) {
-		if (unlikely(!(dev->flags & IFF_UP))) {
-			kfree_skb(skb);
-			goto out;
-		}
+	if (unlikely(!(dev->flags & IFF_UP))) {
+		kfree_skb(skb);
+		goto out;
+	}
 
-		skb = skb_share_check(skb, GFP_ATOMIC);
-		if (!skb)
-			goto out;
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto out;
 
-		*pskb = skb;
-	}
+	*pskb = skb;
 	ipvlan_skb_crossing_ns(skb, dev);
+	skb->pkt_type = PACKET_HOST;
 
-	if (local) {
-		skb->pkt_type = PACKET_HOST;
-		if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
-			success = true;
-	} else {
-		ret = RX_HANDLER_ANOTHER;
+	if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
 		success = true;
-	}
 
 out:
 	ipvlan_count_rx(ipvlan, len, success, false);
-	return ret;
+	return RX_HANDLER_CONSUMED;
 }
 
 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
@@ -525,7 +515,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 
 	addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
 	if (addr)
-		return ipvlan_rcv_frame(addr, &skb, true);
+		return ipvlan_rcv_int_frame(addr, &skb);
 
 out:
 	ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
@@ -546,7 +536,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h,
 						  addr_type, true);
 			if (addr)
-				return ipvlan_rcv_frame(addr, &skb, true);
+				return ipvlan_rcv_int_frame(addr, &skb);
 		}
 		skb = skb_share_check(skb, GFP_ATOMIC);
 		if (!skb)
@@ -616,6 +606,18 @@ static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
 	return true;
 }
 
+static int ipvlan_rcv_ext_frame(struct ipvl_addr *addr, struct sk_buff *skb)
+{
+	struct ipvl_dev *ipvlan = addr->master;
+	struct net_device *dev = ipvlan->dev;
+	unsigned int len = skb->len + ETH_HLEN;
+
+	ipvlan_skb_crossing_ns(skb, dev);
+	ipvlan_count_rx(ipvlan, len, true, false);
+
+	return RX_HANDLER_ANOTHER;
+}
+
 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
 						 struct ipvl_port *port)
 {
@@ -631,7 +633,7 @@ static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
 
 	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
 	if (addr)
-		ret = ipvlan_rcv_frame(addr, pskb, false);
+		ret = ipvlan_rcv_ext_frame(addr, skb);
 
 out:
 	return ret;
@@ -670,7 +672,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 
 		addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
 		if (addr)
-			ret = ipvlan_rcv_frame(addr, pskb, false);
+			ret = ipvlan_rcv_ext_frame(addr, skb);
 	}
 
 	return ret;
-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH net-next 4/9] ipvlan: rework the IP lookup function
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
                   ` (2 preceding siblings ...)
  2017-04-27 14:51 ` [PATCH net-next 3/9] ipvlan: replace ipvlan_rcv_frame Marco Chiappero
@ 2017-04-27 14:51 ` Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 5/9] ipvlan: improve and uniform naming Marco Chiappero
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

Functions ipvlan_get_L3_hdr and ipvlan_addr_lookup are used to determine
whether a packet belongs to a slave by looking at L3 addresses. Being
tightly coupled, these two functions are always used in pair and have
some duplicated code that could be shared.

This patch combines them into a single ipvlan_get_slave_addr function
and refactor caller functions, streamlining code and improving
readability.

A ipvlan_get_slave_addr_dst utility function is also provided as most
lookups are based on the destination IP address.

Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
Tested-by: Marco Chiappero <marco.chiappero@intel.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 187 +++++++++++----------------------------
 1 file changed, 52 insertions(+), 135 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 4683bad..876ce37 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -116,25 +116,34 @@ bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
 	return false;
 }
 
-static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type)
+static struct ipvl_addr *ipvlan_get_slave_addr(struct sk_buff *skb,
+					       struct ipvl_port *port,
+					       bool use_dest)
 {
-	void *lyr3h = NULL;
+	struct ipvl_addr *addr = NULL;
 
 	switch (skb->protocol) {
 	case htons(ETH_P_ARP): {
-		struct arphdr *arph;
+		u8 *arp_ptr;
+		__be32 dip;
 
-		if (unlikely(!pskb_may_pull(skb, sizeof(*arph))))
+		if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
 			return NULL;
 
-		arph = arp_hdr(skb);
-		*type = IPVL_ARP;
-		lyr3h = arph;
+		arp_ptr = (u8 *)(arp_hdr(skb) + 1);
+		if (use_dest)
+			arp_ptr += (2 * port->dev->addr_len) + 4;
+		else
+			arp_ptr += port->dev->addr_len;
+
+		memcpy(&dip, arp_ptr, 4);
+		addr = ipvlan_ht_addr_lookup(port, &dip, false);
 		break;
 	}
 	case htons(ETH_P_IP): {
 		u32 pktlen;
 		struct iphdr *ip4h;
+		__be32 *i4addr;
 
 		if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h))))
 			return NULL;
@@ -146,12 +155,14 @@ static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type)
 		if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))
 			return NULL;
 
-		*type = IPVL_IPV4;
-		lyr3h = ip4h;
+		i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
+		addr = ipvlan_ht_addr_lookup(port, i4addr, false);
 		break;
 	}
 	case htons(ETH_P_IPV6): {
 		struct ipv6hdr *ip6h;
+		struct nd_msg *ndmh;
+		struct in6_addr *i6addr;
 
 		if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
 			return NULL;
@@ -160,21 +171,28 @@ static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type)
 		if (ip6h->version != 6)
 			return NULL;
 
-		*type = IPVL_IPV6;
-		lyr3h = ip6h;
-		/* Only Neighbour Solicitation pkts need different treatment */
+		ndmh = (struct nd_msg *)(ip6h + 1);
 		if (ipv6_addr_any(&ip6h->saddr) &&
-		    ip6h->nexthdr == NEXTHDR_ICMP) {
-			*type = IPVL_ICMPV6;
-			lyr3h = ip6h + 1;
-		}
+		    ip6h->nexthdr == NEXTHDR_ICMP &&
+		    ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
+			i6addr = &ndmh->target;
+		else
+			i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
+
+		addr = ipvlan_ht_addr_lookup(port, i6addr, true);
 		break;
 	}
 	default:
 		return NULL;
 	}
 
-	return lyr3h;
+	return addr;
+}
+
+static inline struct ipvl_addr *ipvlan_get_slave_addr_dst(struct sk_buff *skb,
+							 struct ipvl_port *port)
+{
+	return ipvlan_get_slave_addr(skb, port, true);
 }
 
 unsigned int ipvlan_mac_hash(const unsigned char *addr)
@@ -312,57 +330,6 @@ static int ipvlan_rcv_int_frame(struct ipvl_addr *addr, struct sk_buff **pskb)
 	return RX_HANDLER_CONSUMED;
 }
 
-static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
-					    void *lyr3h, int addr_type,
-					    bool use_dest)
-{
-	struct ipvl_addr *addr = NULL;
-
-	if (addr_type == IPVL_IPV6) {
-		struct ipv6hdr *ip6h;
-		struct in6_addr *i6addr;
-
-		ip6h = (struct ipv6hdr *)lyr3h;
-		i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
-		addr = ipvlan_ht_addr_lookup(port, i6addr, true);
-	} else if (addr_type == IPVL_ICMPV6) {
-		struct nd_msg *ndmh;
-		struct in6_addr *i6addr;
-
-		/* Make sure that the NeighborSolicitation ICMPv6 packets
-		 * are handled to avoid DAD issue.
-		 */
-		ndmh = (struct nd_msg *)lyr3h;
-		if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
-			i6addr = &ndmh->target;
-			addr = ipvlan_ht_addr_lookup(port, i6addr, true);
-		}
-	} else if (addr_type == IPVL_IPV4) {
-		struct iphdr *ip4h;
-		__be32 *i4addr;
-
-		ip4h = (struct iphdr *)lyr3h;
-		i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
-		addr = ipvlan_ht_addr_lookup(port, i4addr, false);
-	} else if (addr_type == IPVL_ARP) {
-		struct arphdr *arph;
-		unsigned char *arp_ptr;
-		__be32 dip;
-
-		arph = (struct arphdr *)lyr3h;
-		arp_ptr = (unsigned char *)(arph + 1);
-		if (use_dest)
-			arp_ptr += (2 * port->dev->addr_len) + 4;
-		else
-			arp_ptr += port->dev->addr_len;
-
-		memcpy(&dip, arp_ptr, 4);
-		addr = ipvlan_ht_addr_lookup(port, &dip, false);
-	}
-
-	return addr;
-}
-
 static int ipvlan_process_v4_outbound(struct sk_buff *skb)
 {
 	const struct iphdr *ip4h = ip_hdr(skb);
@@ -505,19 +472,12 @@ static void ipvlan_multicast_enqueue(struct ipvl_port *port,
 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 {
 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
-	void *lyr3h;
 	struct ipvl_addr *addr;
-	int addr_type;
 
-	lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
-	if (!lyr3h)
-		goto out;
-
-	addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
+	addr = ipvlan_get_slave_addr_dst(skb, ipvlan->port);
 	if (addr)
 		return ipvlan_rcv_int_frame(addr, &skb);
 
-out:
 	ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
 	return ipvlan_process_outbound(skb);
 }
@@ -527,17 +487,12 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ethhdr *eth = eth_hdr(skb);
 	struct ipvl_addr *addr;
-	void *lyr3h;
-	int addr_type;
 
 	if (ether_addr_equal(eth->h_dest, eth->h_source)) {
-		lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
-		if (lyr3h) {
-			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h,
-						  addr_type, true);
-			if (addr)
-				return ipvlan_rcv_int_frame(addr, &skb);
-		}
+		addr = ipvlan_get_slave_addr_dst(skb, ipvlan->port);
+		if (addr)
+			return ipvlan_rcv_int_frame(addr, &skb);
+
 		skb = skb_share_check(skb, GFP_ATOMIC);
 		if (!skb)
 			return NET_XMIT_DROP;
@@ -586,24 +541,10 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NET_XMIT_DROP;
 }
 
-static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
+static inline bool ipvlan_external_frame(struct sk_buff *skb,
+					 struct ipvl_port *port)
 {
-	struct ethhdr *eth = eth_hdr(skb);
-	struct ipvl_addr *addr;
-	void *lyr3h;
-	int addr_type;
-
-	if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) {
-		lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
-		if (!lyr3h)
-			return true;
-
-		addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false);
-		if (addr)
-			return false;
-	}
-
-	return true;
+	return !ipvlan_get_slave_addr(skb, port, false);
 }
 
 static int ipvlan_rcv_ext_frame(struct ipvl_addr *addr, struct sk_buff *skb)
@@ -621,22 +562,14 @@ static int ipvlan_rcv_ext_frame(struct ipvl_addr *addr, struct sk_buff *skb)
 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
 						 struct ipvl_port *port)
 {
-	void *lyr3h;
-	int addr_type;
 	struct ipvl_addr *addr;
 	struct sk_buff *skb = *pskb;
-	rx_handler_result_t ret = RX_HANDLER_PASS;
 
-	lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
-	if (!lyr3h)
-		goto out;
-
-	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+	addr = ipvlan_get_slave_addr_dst(skb, port);
 	if (addr)
-		ret = ipvlan_rcv_ext_frame(addr, skb);
+		return ipvlan_rcv_ext_frame(addr, skb);
 
-out:
-	return ret;
+	return RX_HANDLER_PASS;
 }
 
 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
@@ -644,9 +577,6 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 {
 	struct sk_buff *skb = *pskb;
 	struct ethhdr *eth = eth_hdr(skb);
-	rx_handler_result_t ret = RX_HANDLER_PASS;
-	void *lyr3h;
-	int addr_type;
 
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		if (ipvlan_external_frame(skb, port)) {
@@ -666,16 +596,12 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 	} else {
 		struct ipvl_addr *addr;
 
-		lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
-		if (!lyr3h)
-			return ret;
-
-		addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+		addr = ipvlan_get_slave_addr_dst(skb, port);
 		if (addr)
-			ret = ipvlan_rcv_ext_frame(addr, skb);
+			return ipvlan_rcv_ext_frame(addr, skb);
 	}
 
-	return ret;
+	return RX_HANDLER_PASS;
 }
 
 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
@@ -705,25 +631,16 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
 static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
 					    struct net_device *dev)
 {
-	struct ipvl_addr *addr = NULL;
 	struct ipvl_port *port;
-	void *lyr3h;
-	int addr_type;
 
 	if (!dev || !netif_is_ipvlan_port(dev))
-		goto out;
+		return NULL;
 
 	port = ipvlan_port_get_rcu(dev);
 	if (!port || port->mode != IPVLAN_MODE_L3S)
-		goto out;
-
-	lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
-	if (!lyr3h)
-		goto out;
+		return NULL;
 
-	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
-out:
-	return addr;
+	return ipvlan_get_slave_addr_dst(skb, port);
 }
 
 struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH net-next 5/9] ipvlan: improve and uniform naming
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
                   ` (3 preceding siblings ...)
  2017-04-27 14:51 ` [PATCH net-next 4/9] ipvlan: rework the IP lookup function Marco Chiappero
@ 2017-04-27 14:51 ` Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 6/9] ipvlan: reposition three functions Marco Chiappero
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

Rename a few functions to make them more descriptive. Also rename a
couple of variables to make them consistent with similar ones in other
functions.

Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
Tested-by: Marco Chiappero <marco.chiappero@intel.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 876ce37..54c4e85 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -330,7 +330,7 @@ static int ipvlan_rcv_int_frame(struct ipvl_addr *addr, struct sk_buff **pskb)
 	return RX_HANDLER_CONSUMED;
 }
 
-static int ipvlan_process_v4_outbound(struct sk_buff *skb)
+static int ipvlan_process_l3_outbound_v4(struct sk_buff *skb)
 {
 	const struct iphdr *ip4h = ip_hdr(skb);
 	struct net_device *dev = skb->dev;
@@ -367,7 +367,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb)
 	return ret;
 }
 
-static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+static int ipvlan_process_l3_outbound_v6(struct sk_buff *skb)
 {
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct net_device *dev = skb->dev;
@@ -404,7 +404,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
 	return ret;
 }
 
-static int ipvlan_process_outbound(struct sk_buff *skb)
+static int ipvlan_process_l3_outbound(struct sk_buff *skb)
 {
 	struct ethhdr *ethh = eth_hdr(skb);
 	int ret = NET_XMIT_DROP;
@@ -428,9 +428,9 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
 	}
 
 	if (skb->protocol == htons(ETH_P_IPV6)) {
-		ret = ipvlan_process_v6_outbound(skb);
+		ret = ipvlan_process_l3_outbound_v6(skb);
 	} else if (skb->protocol == htons(ETH_P_IP)) {
-		ret = ipvlan_process_v4_outbound(skb);
+		ret = ipvlan_process_l3_outbound_v4(skb);
 	} else {
 		pr_warn_ratelimited("Dropped outbound packet type=%x\n",
 				    ntohs(skb->protocol));
@@ -479,16 +479,16 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 		return ipvlan_rcv_int_frame(addr, &skb);
 
 	ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
-	return ipvlan_process_outbound(skb);
+	return ipvlan_process_l3_outbound(skb);
 }
 
 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 {
 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
-	struct ethhdr *eth = eth_hdr(skb);
+	struct ethhdr *ethh = eth_hdr(skb);
 	struct ipvl_addr *addr;
 
-	if (ether_addr_equal(eth->h_dest, eth->h_source)) {
+	if (ether_addr_equal(ethh->h_dest, ethh->h_source)) {
 		addr = ipvlan_get_slave_addr_dst(skb, ipvlan->port);
 		if (addr)
 			return ipvlan_rcv_int_frame(addr, &skb);
@@ -504,7 +504,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		 */
 		return dev_forward_skb(ipvlan->phy_dev, skb);
 
-	} else if (is_multicast_ether_addr(eth->h_dest)) {
+	} else if (is_multicast_ether_addr(ethh->h_dest)) {
 		ipvlan_skb_crossing_ns(skb, NULL);
 		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
 		return NET_XMIT_SUCCESS;
@@ -576,9 +576,8 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 						 struct ipvl_port *port)
 {
 	struct sk_buff *skb = *pskb;
-	struct ethhdr *eth = eth_hdr(skb);
 
-	if (is_multicast_ether_addr(eth->h_dest)) {
+	if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
 		if (ipvlan_external_frame(skb, port)) {
 			struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
 
@@ -628,8 +627,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
 	return RX_HANDLER_CONSUMED;
 }
 
-static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
-					    struct net_device *dev)
+static struct ipvl_addr *ipvlan_skb_to_addr_l3s(struct sk_buff *skb,
+						struct net_device *dev)
 {
 	struct ipvl_port *port;
 
@@ -649,7 +648,7 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
 	struct ipvl_addr *addr;
 	struct net_device *sdev;
 
-	addr = ipvlan_skb_to_addr(skb, dev);
+	addr = ipvlan_skb_to_addr_l3s(skb, dev);
 	if (!addr)
 		goto out;
 
@@ -699,7 +698,7 @@ unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
 	struct ipvl_addr *addr;
 	unsigned int len;
 
-	addr = ipvlan_skb_to_addr(skb, skb->dev);
+	addr = ipvlan_skb_to_addr_l3s(skb, skb->dev);
 	if (!addr)
 		goto out;
 
-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH net-next 6/9] ipvlan: reposition three functions
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
                   ` (4 preceding siblings ...)
  2017-04-27 14:51 ` [PATCH net-next 5/9] ipvlan: improve and uniform naming Marco Chiappero
@ 2017-04-27 14:51 ` Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 7/9] ipvlan: relocate ipvlan_skb_crossing_ns calls Marco Chiappero
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

Move ipvlan_multicast_enqueue, ipvlan_skb_crossing_ns and
ipvlan_mac_hash close to their related siblings.

Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 80 ++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 54c4e85..81dc3e5 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -88,6 +88,14 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
 	hlist_del_init_rcu(&addr->hlnode);
 }
 
+unsigned int ipvlan_mac_hash(const unsigned char *addr)
+{
+	u32 hash = jhash_1word(__get_unaligned_cpu32(addr + 2),
+			       ipvlan_jhash_secret);
+
+	return hash & IPVLAN_MAC_FILTER_MASK;
+}
+
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
 				   const void *iaddr, bool is_v6)
 {
@@ -195,12 +203,16 @@ static inline struct ipvl_addr *ipvlan_get_slave_addr_dst(struct sk_buff *skb,
 	return ipvlan_get_slave_addr(skb, port, true);
 }
 
-unsigned int ipvlan_mac_hash(const unsigned char *addr)
+static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
 {
-	u32 hash = jhash_1word(__get_unaligned_cpu32(addr + 2),
-			       ipvlan_jhash_secret);
+	bool xnet = true;
 
-	return hash & IPVLAN_MAC_FILTER_MASK;
+	if (dev)
+		xnet = !net_eq(dev_net(skb->dev), dev_net(dev));
+
+	skb_scrub_packet(skb, xnet);
+	if (dev)
+		skb->dev = dev;
 }
 
 static void ipvlan_dispatch_multicast(struct ipvl_port *port,
@@ -286,16 +298,33 @@ void ipvlan_process_multicast(struct work_struct *work)
 	}
 }
 
-static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
+static void ipvlan_multicast_enqueue(struct ipvl_port *port,
+				     struct sk_buff *skb, bool tx_pkt)
 {
-	bool xnet = true;
+	if (skb->protocol == htons(ETH_P_PAUSE)) {
+		kfree_skb(skb);
+		return;
+	}
 
-	if (dev)
-		xnet = !net_eq(dev_net(skb->dev), dev_net(dev));
+	/* Record that the deferred packet is from TX or RX path. By
+	 * looking at mac-addresses on packet will lead to erronus decisions.
+	 * (This would be true for a loopback-mode on master device or a
+	 * hair-pin mode of the switch.)
+	 */
+	IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
 
-	skb_scrub_packet(skb, xnet);
-	if (dev)
-		skb->dev = dev;
+	spin_lock(&port->backlog.lock);
+	if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
+		if (skb->dev)
+			dev_hold(skb->dev);
+		__skb_queue_tail(&port->backlog, skb);
+		spin_unlock(&port->backlog.lock);
+		schedule_work(&port->wq);
+	} else {
+		spin_unlock(&port->backlog.lock);
+		atomic_long_inc(&skb->dev->rx_dropped);
+		kfree_skb(skb);
+	}
 }
 
 static int ipvlan_rcv_int_frame(struct ipvl_addr *addr, struct sk_buff **pskb)
@@ -440,35 +469,6 @@ static int ipvlan_process_l3_outbound(struct sk_buff *skb)
 	return ret;
 }
 
-static void ipvlan_multicast_enqueue(struct ipvl_port *port,
-				     struct sk_buff *skb, bool tx_pkt)
-{
-	if (skb->protocol == htons(ETH_P_PAUSE)) {
-		kfree_skb(skb);
-		return;
-	}
-
-	/* Record that the deferred packet is from TX or RX path. By
-	 * looking at mac-addresses on packet will lead to erronus decisions.
-	 * (This would be true for a loopback-mode on master device or a
-	 * hair-pin mode of the switch.)
-	 */
-	IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
-
-	spin_lock(&port->backlog.lock);
-	if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
-		if (skb->dev)
-			dev_hold(skb->dev);
-		__skb_queue_tail(&port->backlog, skb);
-		spin_unlock(&port->backlog.lock);
-		schedule_work(&port->wq);
-	} else {
-		spin_unlock(&port->backlog.lock);
-		atomic_long_inc(&skb->dev->rx_dropped);
-		kfree_skb(skb);
-	}
-}
-
 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 {
 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH net-next 7/9] ipvlan: relocate ipvlan_skb_crossing_ns calls
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
                   ` (5 preceding siblings ...)
  2017-04-27 14:51 ` [PATCH net-next 6/9] ipvlan: reposition three functions Marco Chiappero
@ 2017-04-27 14:51 ` Marco Chiappero
  2017-04-27 14:51 ` [PATCH net-next 8/9] ipvlan: improve compiler hints Marco Chiappero
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

Relocate ipvlan_skb_crossing_ns invocations across the code to reduce
duplications and improve readability. Additionally introduce
ipvlan_process_l2_outbound for the purpose, similarly to
ipvlan_process_l3_outbound.

Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 81dc3e5..a9fc1b5 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -289,6 +289,8 @@ void ipvlan_process_multicast(struct work_struct *work)
 		ethh = eth_hdr(skb);
 		mac_hash = ipvlan_mac_hash(ethh->h_dest);
 
+		ipvlan_skb_crossing_ns(skb, NULL);
+
 		if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
 			pkt_type = PACKET_BROADCAST;
 		else
@@ -433,7 +435,8 @@ static int ipvlan_process_l3_outbound_v6(struct sk_buff *skb)
 	return ret;
 }
 
-static int ipvlan_process_l3_outbound(struct sk_buff *skb)
+static int ipvlan_process_l3_outbound(struct sk_buff *skb,
+				      struct net_device *dev)
 {
 	struct ethhdr *ethh = eth_hdr(skb);
 	int ret = NET_XMIT_DROP;
@@ -446,6 +449,8 @@ static int ipvlan_process_l3_outbound(struct sk_buff *skb)
 		goto out;
 	}
 
+	ipvlan_skb_crossing_ns(skb, dev);
+
 	/* The ipvlan is a pseudo-L2 device, so the packets that we receive
 	 * will have L2; which need to discarded and processed further
 	 * in the net-ns of the main-device.
@@ -478,8 +483,14 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 	if (addr)
 		return ipvlan_rcv_int_frame(addr, &skb);
 
-	ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
-	return ipvlan_process_l3_outbound(skb);
+	return ipvlan_process_l3_outbound(skb, ipvlan->phy_dev);
+}
+
+static inline int ipvlan_process_l2_outbound(struct sk_buff *skb,
+					     struct net_device *dev)
+{
+	ipvlan_skb_crossing_ns(skb, dev);
+	return dev_queue_xmit(skb);
 }
 
 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
@@ -505,13 +516,11 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 		return dev_forward_skb(ipvlan->phy_dev, skb);
 
 	} else if (is_multicast_ether_addr(ethh->h_dest)) {
-		ipvlan_skb_crossing_ns(skb, NULL);
 		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
 		return NET_XMIT_SUCCESS;
 	}
 
-	ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
-	return dev_queue_xmit(skb);
+	return ipvlan_process_l2_outbound(skb, ipvlan->phy_dev);
 }
 
 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -587,10 +596,8 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 			 * when work-queue processes this frame. This is
 			 * achieved by returning RX_HANDLER_PASS.
 			 */
-			if (nskb) {
-				ipvlan_skb_crossing_ns(nskb, NULL);
+			if (nskb)
 				ipvlan_multicast_enqueue(port, nskb, false);
-			}
 		}
 	} else {
 		struct ipvl_addr *addr;
-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH net-next 8/9] ipvlan: improve compiler hints
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
                   ` (6 preceding siblings ...)
  2017-04-27 14:51 ` [PATCH net-next 7/9] ipvlan: relocate ipvlan_skb_crossing_ns calls Marco Chiappero
@ 2017-04-27 14:51 ` Marco Chiappero
  2017-04-27 15:21   ` Duyck, Alexander H
  2017-04-27 14:51 ` [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses Marco Chiappero
  2017-04-27 19:21 ` [PATCH net-next 0/9] support unique MAC addresses for slave devices Mahesh Bandewar (महेश बंडेवार)
  9 siblings, 1 reply; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

Extend inlining and branch prediction hints.

Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index a9fc1b5..67e342d 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -88,7 +88,7 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
 	hlist_del_init_rcu(&addr->hlnode);
 }
 
-unsigned int ipvlan_mac_hash(const unsigned char *addr)
+inline unsigned int ipvlan_mac_hash(const unsigned char *addr)
 {
 	u32 hash = jhash_1word(__get_unaligned_cpu32(addr + 2),
 			       ipvlan_jhash_secret);
@@ -505,7 +505,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 			return ipvlan_rcv_int_frame(addr, &skb);
 
 		skb = skb_share_check(skb, GFP_ATOMIC);
-		if (!skb)
+		if (unlikely(!skb))
 			return NET_XMIT_DROP;
 
 		/* Packet definitely does not belong to any of the
@@ -596,7 +596,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 			 * when work-queue processes this frame. This is
 			 * achieved by returning RX_HANDLER_PASS.
 			 */
-			if (nskb)
+			if (likely(nskb))
 				ipvlan_multicast_enqueue(port, nskb, false);
 		}
 	} else {
-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
                   ` (7 preceding siblings ...)
  2017-04-27 14:51 ` [PATCH net-next 8/9] ipvlan: improve compiler hints Marco Chiappero
@ 2017-04-27 14:51 ` Marco Chiappero
  2017-04-27 16:20   ` Dan Williams
  2017-04-27 20:30   ` kbuild test robot
  2017-04-27 19:21 ` [PATCH net-next 0/9] support unique MAC addresses for slave devices Mahesh Bandewar (महेश बंडेवार)
  9 siblings, 2 replies; 27+ messages in thread
From: Marco Chiappero @ 2017-04-27 14:51 UTC (permalink / raw)
  To: netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar, Marco Chiappero

Currently all the slave devices belonging to the same port inherit their
MAC address from its master device. This patch removes this limitation
and allows every slave device to obtain a unique MAC address, by default
randomly generated at creation time.

Moreover it is now possible to correctly modify the MAC address at any
time, fixing an existing bug as MAC address changes on the master were
not reflected on the slaves. It also avoids multiple interfaces sharing
the same IPv6 link-local address.

Since ipvlan is designed to expose a single MAC address for external
communications, the driver now behaves as follow:
- L2 mode:
   * Any reference to the internal MAC address of the originating slave
     is replaced with the MAC address of the master for outbound frames.
   * Likewise, the destination MAC address is overwritten with the
     internal one (once the correct slave is determined) for any
     incoming external frame.
   * For any internal slave-to-slave communication, the original MAC
     addresses are preserved (although not used for routing/switching).
- L3/L3s mode:
   * The destination MAC address for incoming external packets is
     replaced with the one belonging to the destination slave device
     (as for L2 mode)
   * Every other path behaves as before.

Being a significant behavioral change, version number has been
increased.

Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
Tested-by: Marco Chiappero <marco.chiappero@intel.com>
---
 drivers/net/ipvlan/ipvlan.h      |   2 +-
 drivers/net/ipvlan/ipvlan_core.c | 113 ++++++++++++++++++++++++++++++++++-----
 drivers/net/ipvlan/ipvlan_main.c |  18 +++----
 3 files changed, 111 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 800a46c..efe4fd1 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -32,7 +32,7 @@
 #include <net/l3mdev.h>
 
 #define IPVLAN_DRV	"ipvlan"
-#define IPV_DRV_VER	"0.1"
+#define IPV_DRV_VER	"0.2"
 
 #define IPVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
 #define IPVLAN_HASH_MASK	(IPVLAN_HASH_SIZE - 1)
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 67e342d..a30bc11 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -215,6 +215,89 @@ static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
 		skb->dev = dev;
 }
 
+static inline struct nd_opt_hdr *ipvlan_icmp6_nd_opts(struct icmp6hdr *icmph)
+{
+	return (struct nd_opt_hdr *)((struct nd_msg *)icmph)->opt;
+}
+
+static inline struct nd_opt_hdr *ipvlan_icmp6_rs_opts(struct icmp6hdr *icmph)
+{
+	return (struct nd_opt_hdr *)((struct rs_msg *)icmph)->opt;
+}
+
+static void ipvlan_proxy_l2_update_icmp6(const struct net_device *master,
+					 struct sk_buff *skb,
+					 struct nd_opt_hdr *nd_opt,
+					 u8 opt_type)
+{
+	u32 opts_len = skb_tail_pointer(skb) - (u8 *)nd_opt;
+
+	while (opts_len) {
+		u32 opt_len = nd_opt->nd_opt_len << 3;
+
+		if (nd_opt->nd_opt_type == opt_type) {
+			struct ipv6hdr *ip6h = ipv6_hdr(skb);
+			struct icmp6hdr *icmph = icmp6_hdr(skb);
+			u32 len = ntohs(ip6h->payload_len);
+
+			memcpy(nd_opt + 1, master->dev_addr, master->addr_len);
+			icmph->icmp6_cksum = 0;
+			icmph->icmp6_cksum =
+				csum_ipv6_magic(&ip6h->saddr,
+						&ip6h->daddr, len,
+						IPPROTO_ICMPV6,
+						csum_partial(icmph, len, 0));
+			return;
+		}
+
+		opts_len -= opt_len;
+		nd_opt = ((void *)nd_opt) + opt_len;
+	}
+}
+
+static void ipvlan_proxy_l2_outbound(struct sk_buff *skb,
+				     const struct net_device *master)
+{
+	/* masquerade the source MAC address for every outgoing frame */
+	memcpy(eth_hdr(skb)->h_source, master->dev_addr, master->addr_len);
+
+	/* ARP and some NDISC packets need additional treatment */
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h = ipv6_hdr(skb);
+		struct icmp6hdr *icmph = icmp6_hdr(skb);
+		struct nd_opt_hdr *nd_opt;
+		u8 opt_type;
+
+		if (likely(ip6h->nexthdr != NEXTHDR_ICMP))
+			return;
+
+		switch (icmph->icmp6_type) {
+		case NDISC_NEIGHBOUR_SOLICITATION: {
+			nd_opt = ipvlan_icmp6_nd_opts(icmph);
+			opt_type = ND_OPT_SOURCE_LL_ADDR;
+			break;
+		}
+		case NDISC_NEIGHBOUR_ADVERTISEMENT: {
+			nd_opt = ipvlan_icmp6_nd_opts(icmph);
+			opt_type = ND_OPT_TARGET_LL_ADDR;
+			break;
+		}
+		case NDISC_ROUTER_SOLICITATION: {
+			nd_opt = ipvlan_icmp6_rs_opts(icmph);
+			opt_type = ND_OPT_SOURCE_LL_ADDR;
+			break;
+		}
+		default:
+			return;
+		}
+
+		ipvlan_proxy_l2_update_icmp6(master, skb, nd_opt, opt_type);
+
+	} else if (unlikely(skb->protocol == htons(ETH_P_ARP))) {
+		memcpy(arp_hdr(skb) + 1, master->dev_addr, master->addr_len);
+	}
+}
+
 static void ipvlan_dispatch_multicast(struct ipvl_port *port,
 				      struct sk_buff *skb, u8 pkt_type,
 				      unsigned int mac_hash)
@@ -258,6 +341,7 @@ static void ipvlan_dispatch_multicast(struct ipvl_port *port,
 		/* If the packet originated here, send it out. */
 		skb->dev = port->dev;
 		skb->pkt_type = pkt_type;
+		ipvlan_proxy_l2_outbound(skb, port->dev);
 		dev_queue_xmit(skb);
 	} else {
 		if (consumed)
@@ -489,6 +573,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 static inline int ipvlan_process_l2_outbound(struct sk_buff *skb,
 					     struct net_device *dev)
 {
+	ipvlan_proxy_l2_outbound(skb, dev);
 	ipvlan_skb_crossing_ns(skb, dev);
 	return dev_queue_xmit(skb);
 }
@@ -499,27 +584,27 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *ethh = eth_hdr(skb);
 	struct ipvl_addr *addr;
 
-	if (ether_addr_equal(ethh->h_dest, ethh->h_source)) {
-		addr = ipvlan_get_slave_addr_dst(skb, ipvlan->port);
-		if (addr)
-			return ipvlan_rcv_int_frame(addr, &skb);
+	if (is_multicast_ether_addr(ethh->h_dest)) {
+		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
+		return NET_XMIT_SUCCESS;
+	}
 
+	if (ether_addr_equal(ethh->h_dest, ipvlan->phy_dev->dev_addr)) {
 		skb = skb_share_check(skb, GFP_ATOMIC);
 		if (unlikely(!skb))
 			return NET_XMIT_DROP;
 
-		/* Packet definitely does not belong to any of the
-		 * virtual devices, but the dest is local. So forward
-		 * the skb for the main-dev. At the RX side we just return
-		 * RX_PASS for it to be processed further on the stack.
+		/* Forward the skb for the master device. At the RX side we
+		 * just return RX_HANDLER_PASS for it to be processed further
+		 * on the stack.
 		 */
 		return dev_forward_skb(ipvlan->phy_dev, skb);
-
-	} else if (is_multicast_ether_addr(ethh->h_dest)) {
-		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
-		return NET_XMIT_SUCCESS;
 	}
 
+	addr = ipvlan_get_slave_addr_dst(skb, ipvlan->port);
+	if (addr)
+		return ipvlan_rcv_int_frame(addr, &skb);
+
 	return ipvlan_process_l2_outbound(skb, ipvlan->phy_dev);
 }
 
@@ -562,6 +647,10 @@ static int ipvlan_rcv_ext_frame(struct ipvl_addr *addr, struct sk_buff *skb)
 	struct net_device *dev = ipvlan->dev;
 	unsigned int len = skb->len + ETH_HLEN;
 
+	/* NOTE: although not necessary restore the actual destination
+	 * address; this is also what traffic sniffers will display.
+	 */
+	memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, dev->addr_len);
 	ipvlan_skb_crossing_ns(skb, dev);
 	ipvlan_count_rx(ipvlan, len, true, false);
 
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index b837807..709f27d 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -378,6 +378,7 @@ static const struct net_device_ops ipvlan_netdev_ops = {
 	.ndo_start_xmit		= ipvlan_start_xmit,
 	.ndo_fix_features	= ipvlan_fix_features,
 	.ndo_change_rx_flags	= ipvlan_change_rx_flags,
+	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_set_rx_mode	= ipvlan_set_multicast_mac_filter,
 	.ndo_get_stats64	= ipvlan_get_stats64,
 	.ndo_vlan_rx_add_vid	= ipvlan_vlan_rx_add_vid,
@@ -392,9 +393,10 @@ static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct net_device *phy_dev = ipvlan->phy_dev;
 
-	/* TODO Probably use a different field than dev_addr so that the
-	 * mac-address on the virtual device is portable and can be carried
-	 * while the packets use the mac-addr on the physical device.
+	/* This driver uses (almost exclusively) L3 addresses for
+	 * routing/switching. Use the actual slave's MAC address,
+	 * but overwrite it later during the packet processing for
+	 * frames leaving from master
 	 */
 	return dev_hard_header(skb, phy_dev, type, daddr,
 			       saddr ? : dev->dev_addr, len);
@@ -559,11 +561,8 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	/* Increment id-base to the next slot for the future assignment */
 	port->dev_id_start = err + 1;
 
-	/* TODO Probably put random address here to be presented to the
-	 * world but keep using the physical-dev address for the outgoing
-	 * packets.
-	 */
-	memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
+	/* TODO: consider storing the original MAC address in dev->perm_addr */
+	eth_hw_addr_random(dev);
 
 	dev->priv_flags |= IFF_IPVLAN_SLAVE;
 
@@ -619,7 +618,8 @@ void ipvlan_link_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
-	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
+	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE
+			   | IFF_LIVE_ADDR_CHANGE;
 	dev->netdev_ops = &ipvlan_netdev_ops;
 	dev->destructor = free_netdev;
 	dev->header_ops = &ipvlan_header_ops;
-- 
2.9.3

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* RE: [PATCH net-next 8/9] ipvlan: improve compiler hints
  2017-04-27 14:51 ` [PATCH net-next 8/9] ipvlan: improve compiler hints Marco Chiappero
@ 2017-04-27 15:21   ` Duyck, Alexander H
  2017-04-27 15:27     ` David Laight
  2017-04-27 16:05     ` David Miller
  0 siblings, 2 replies; 27+ messages in thread
From: Duyck, Alexander H @ 2017-04-27 15:21 UTC (permalink / raw)
  To: Chiappero, Marco, netdev
  Cc: David S . Miller, Kirsher, Jeffrey T, Grandhi, Sainath, Mahesh Bandewar

> -----Original Message-----
> From: Chiappero, Marco
> Sent: Thursday, April 27, 2017 7:52 AM
> To: netdev@vger.kernel.org
> Cc: David S . Miller <davem@davemloft.net>; Kirsher, Jeffrey T
> <jeffrey.t.kirsher@intel.com>; Duyck, Alexander H
> <alexander.h.duyck@intel.com>; Grandhi, Sainath
> <sainath.grandhi@intel.com>; Mahesh Bandewar <maheshb@google.com>;
> Chiappero, Marco <marco.chiappero@intel.com>
> Subject: [PATCH net-next 8/9] ipvlan: improve compiler hints
> 
> Extend inlining and branch prediction hints.
> 
> Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
> ---
>  drivers/net/ipvlan/ipvlan_core.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
> index a9fc1b5..67e342d 100644
> --- a/drivers/net/ipvlan/ipvlan_core.c
> +++ b/drivers/net/ipvlan/ipvlan_core.c
> @@ -88,7 +88,7 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
>  	hlist_del_init_rcu(&addr->hlnode);
>  }
> 
> -unsigned int ipvlan_mac_hash(const unsigned char *addr)
> +inline unsigned int ipvlan_mac_hash(const unsigned char *addr)
>  {
>  	u32 hash = jhash_1word(__get_unaligned_cpu32(addr + 2),
>  			       ipvlan_jhash_secret);

I'm kind of surprised this isn't causing a problem with differing declarations between the declaration here and the declaration in ipvlan.h. Normally for inlining something like this you would change it to a "static inline" and move the entire declaration into the header file.

> @@ -505,7 +505,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb,
> struct net_device *dev)
>  			return ipvlan_rcv_int_frame(addr, &skb);
> 
>  		skb = skb_share_check(skb, GFP_ATOMIC);
> -		if (!skb)
> +		if (unlikely(!skb))
>  			return NET_XMIT_DROP;
> 
>  		/* Packet definitely does not belong to any of the @@ -596,7
> +596,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff
> **pskb,
>  			 * when work-queue processes this frame. This is
>  			 * achieved by returning RX_HANDLER_PASS.
>  			 */
> -			if (nskb)
> +			if (likely(nskb))
>  				ipvlan_multicast_enqueue(port, nskb, false);
>  		}
>  	} else {
> --
> 2.9.3

^ permalink raw reply	[flat|nested] 27+ messages in thread

* RE: [PATCH net-next 8/9] ipvlan: improve compiler hints
  2017-04-27 15:21   ` Duyck, Alexander H
@ 2017-04-27 15:27     ` David Laight
  2017-04-27 16:05     ` David Miller
  1 sibling, 0 replies; 27+ messages in thread
From: David Laight @ 2017-04-27 15:27 UTC (permalink / raw)
  To: 'Duyck, Alexander H', Chiappero, Marco, netdev
  Cc: David S . Miller, Kirsher, Jeffrey T, Grandhi, Sainath, Mahesh Bandewar

From: Duyck, Alexander H
> Sent: 27 April 2017 16:21
...
> > -unsigned int ipvlan_mac_hash(const unsigned char *addr)
> > +inline unsigned int ipvlan_mac_hash(const unsigned char *addr)
> >  {
> >  	u32 hash = jhash_1word(__get_unaligned_cpu32(addr + 2),
> >  			       ipvlan_jhash_secret);
> 
> I'm kind of surprised this isn't causing a problem with differing declarations between the declaration
> here and the declaration in ipvlan.h. Normally for inlining something like this you would change it to
> a "static inline" and move the entire declaration into the header file.

You get a callable copy for external callers and local calls inlined.
Not usually what you want.

	David

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 8/9] ipvlan: improve compiler hints
  2017-04-27 15:21   ` Duyck, Alexander H
  2017-04-27 15:27     ` David Laight
@ 2017-04-27 16:05     ` David Miller
  1 sibling, 0 replies; 27+ messages in thread
From: David Miller @ 2017-04-27 16:05 UTC (permalink / raw)
  To: alexander.h.duyck
  Cc: marco.chiappero, netdev, jeffrey.t.kirsher, sainath.grandhi, maheshb

From: "Duyck, Alexander H" <alexander.h.duyck@intel.com>
Date: Thu, 27 Apr 2017 15:21:16 +0000

>> -----Original Message-----
>> From: Chiappero, Marco
>> Sent: Thursday, April 27, 2017 7:52 AM
>> To: netdev@vger.kernel.org
>> Cc: David S . Miller <davem@davemloft.net>; Kirsher, Jeffrey T
>> <jeffrey.t.kirsher@intel.com>; Duyck, Alexander H
>> <alexander.h.duyck@intel.com>; Grandhi, Sainath
>> <sainath.grandhi@intel.com>; Mahesh Bandewar <maheshb@google.com>;
>> Chiappero, Marco <marco.chiappero@intel.com>
>> Subject: [PATCH net-next 8/9] ipvlan: improve compiler hints
>> 
>> Extend inlining and branch prediction hints.
>> 
>> Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
>> ---
>>  drivers/net/ipvlan/ipvlan_core.c | 6 +++---
>>  1 file changed, 3 insertions(+), 3 deletions(-)
>> 
>> diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
>> index a9fc1b5..67e342d 100644
>> --- a/drivers/net/ipvlan/ipvlan_core.c
>> +++ b/drivers/net/ipvlan/ipvlan_core.c
>> @@ -88,7 +88,7 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
>>  	hlist_del_init_rcu(&addr->hlnode);
>>  }
>> 
>> -unsigned int ipvlan_mac_hash(const unsigned char *addr)
>> +inline unsigned int ipvlan_mac_hash(const unsigned char *addr)
>>  {
>>  	u32 hash = jhash_1word(__get_unaligned_cpu32(addr + 2),
>>  			       ipvlan_jhash_secret);
> 
> I'm kind of surprised this isn't causing a problem with differing
> declarations between the declaration here and the declaration in
> ipvlan.h. Normally for inlining something like this you would change
> it to a "static inline" and move the entire declaration into the
> header file.

No inlines in foo.c files please, seriously let the compiler decide
it knows better than you.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-04-27 14:51 ` [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses Marco Chiappero
@ 2017-04-27 16:20   ` Dan Williams
  2017-04-27 16:23     ` Dan Williams
  2017-04-27 20:30   ` kbuild test robot
  1 sibling, 1 reply; 27+ messages in thread
From: Dan Williams @ 2017-04-27 16:20 UTC (permalink / raw)
  To: Marco Chiappero, netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar

On Thu, 2017-04-27 at 15:51 +0100, Marco Chiappero wrote:
> Currently all the slave devices belonging to the same port inherit
> their
> MAC address from its master device. This patch removes this
> limitation
> and allows every slave device to obtain a unique MAC address, by
> default
> randomly generated at creation time.
> 
> Moreover it is now possible to correctly modify the MAC address at
> any
> time, fixing an existing bug as MAC address changes on the master
> were
> not reflected on the slaves. It also avoids multiple interfaces
> sharing
> the same IPv6 link-local address.

How is this different than macvlan now?  Why would you use unique
addressed ipvlan instances instead of macvlan?  Wouldn't the same
problems around external switches not expecting multiple MACs from the
same switch port apply now to ipvlan?

The whole point of ipvlan AIUI was to get around macvlan problems
related to multiple MACs on the same port.

Also, I think the IPv6 thing you mention is incorrect and has long
since been solved.  Originally, ipvlan did not include a "dev_id"
property that differened between child interfaces, and thus the IID of
the each interface was the same.  That has now been fixed, and each
ipvlan slave should now have a different IID and thus a different link-
local address.

Dan

> Since ipvlan is designed to expose a single MAC address for external
> communications, the driver now behaves as follow:
> - L2 mode:
>    * Any reference to the internal MAC address of the originating
> slave
>      is replaced with the MAC address of the master for outbound
> frames.
>    * Likewise, the destination MAC address is overwritten with the
>      internal one (once the correct slave is determined) for any
>      incoming external frame.
>    * For any internal slave-to-slave communication, the original MAC
>      addresses are preserved (although not used for
> routing/switching).
> - L3/L3s mode:
>    * The destination MAC address for incoming external packets is
>      replaced with the one belonging to the destination slave device
>      (as for L2 mode)
>    * Every other path behaves as before.
> 
> Being a significant behavioral change, version number has been
> increased.
> 
> Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
> Tested-by: Marco Chiappero <marco.chiappero@intel.com>
> ---
>  drivers/net/ipvlan/ipvlan.h      |   2 +-
>  drivers/net/ipvlan/ipvlan_core.c | 113
> ++++++++++++++++++++++++++++++++++-----
>  drivers/net/ipvlan/ipvlan_main.c |  18 +++----
>  3 files changed, 111 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/ipvlan/ipvlan.h
> b/drivers/net/ipvlan/ipvlan.h
> index 800a46c..efe4fd1 100644
> --- a/drivers/net/ipvlan/ipvlan.h
> +++ b/drivers/net/ipvlan/ipvlan.h
> @@ -32,7 +32,7 @@
>  #include <net/l3mdev.h>
>  
>  #define IPVLAN_DRV	"ipvlan"
> -#define IPV_DRV_VER	"0.1"
> +#define IPV_DRV_VER	"0.2"
>  
>  #define IPVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
>  #define IPVLAN_HASH_MASK	(IPVLAN_HASH_SIZE - 1)
> diff --git a/drivers/net/ipvlan/ipvlan_core.c
> b/drivers/net/ipvlan/ipvlan_core.c
> index 67e342d..a30bc11 100644
> --- a/drivers/net/ipvlan/ipvlan_core.c
> +++ b/drivers/net/ipvlan/ipvlan_core.c
> @@ -215,6 +215,89 @@ static void ipvlan_skb_crossing_ns(struct
> sk_buff *skb, struct net_device *dev)
>  		skb->dev = dev;
>  }
>  
> +static inline struct nd_opt_hdr *ipvlan_icmp6_nd_opts(struct
> icmp6hdr *icmph)
> +{
> +	return (struct nd_opt_hdr *)((struct nd_msg *)icmph)->opt;
> +}
> +
> +static inline struct nd_opt_hdr *ipvlan_icmp6_rs_opts(struct
> icmp6hdr *icmph)
> +{
> +	return (struct nd_opt_hdr *)((struct rs_msg *)icmph)->opt;
> +}
> +
> +static void ipvlan_proxy_l2_update_icmp6(const struct net_device
> *master,
> +					 struct sk_buff *skb,
> +					 struct nd_opt_hdr *nd_opt,
> +					 u8 opt_type)
> +{
> +	u32 opts_len = skb_tail_pointer(skb) - (u8 *)nd_opt;
> +
> +	while (opts_len) {
> +		u32 opt_len = nd_opt->nd_opt_len << 3;
> +
> +		if (nd_opt->nd_opt_type == opt_type) {
> +			struct ipv6hdr *ip6h = ipv6_hdr(skb);
> +			struct icmp6hdr *icmph = icmp6_hdr(skb);
> +			u32 len = ntohs(ip6h->payload_len);
> +
> +			memcpy(nd_opt + 1, master->dev_addr, master-
> >addr_len);
> +			icmph->icmp6_cksum = 0;
> +			icmph->icmp6_cksum =
> +				csum_ipv6_magic(&ip6h->saddr,
> +						&ip6h->daddr, len,
> +						IPPROTO_ICMPV6,
> +						csum_partial(icmph,
> len, 0));
> +			return;
> +		}
> +
> +		opts_len -= opt_len;
> +		nd_opt = ((void *)nd_opt) + opt_len;
> +	}
> +}
> +
> +static void ipvlan_proxy_l2_outbound(struct sk_buff *skb,
> +				     const struct net_device
> *master)
> +{
> +	/* masquerade the source MAC address for every outgoing
> frame */
> +	memcpy(eth_hdr(skb)->h_source, master->dev_addr, master-
> >addr_len);
> +
> +	/* ARP and some NDISC packets need additional treatment */
> +	if (skb->protocol == htons(ETH_P_IPV6)) {
> +		struct ipv6hdr *ip6h = ipv6_hdr(skb);
> +		struct icmp6hdr *icmph = icmp6_hdr(skb);
> +		struct nd_opt_hdr *nd_opt;
> +		u8 opt_type;
> +
> +		if (likely(ip6h->nexthdr != NEXTHDR_ICMP))
> +			return;
> +
> +		switch (icmph->icmp6_type) {
> +		case NDISC_NEIGHBOUR_SOLICITATION: {
> +			nd_opt = ipvlan_icmp6_nd_opts(icmph);
> +			opt_type = ND_OPT_SOURCE_LL_ADDR;
> +			break;
> +		}
> +		case NDISC_NEIGHBOUR_ADVERTISEMENT: {
> +			nd_opt = ipvlan_icmp6_nd_opts(icmph);
> +			opt_type = ND_OPT_TARGET_LL_ADDR;
> +			break;
> +		}
> +		case NDISC_ROUTER_SOLICITATION: {
> +			nd_opt = ipvlan_icmp6_rs_opts(icmph);
> +			opt_type = ND_OPT_SOURCE_LL_ADDR;
> +			break;
> +		}
> +		default:
> +			return;
> +		}
> +
> +		ipvlan_proxy_l2_update_icmp6(master, skb, nd_opt,
> opt_type);
> +
> +	} else if (unlikely(skb->protocol == htons(ETH_P_ARP))) {
> +		memcpy(arp_hdr(skb) + 1, master->dev_addr, master-
> >addr_len);
> +	}
> +}
> +
>  static void ipvlan_dispatch_multicast(struct ipvl_port *port,
>  				      struct sk_buff *skb, u8
> pkt_type,
>  				      unsigned int mac_hash)
> @@ -258,6 +341,7 @@ static void ipvlan_dispatch_multicast(struct
> ipvl_port *port,
>  		/* If the packet originated here, send it out. */
>  		skb->dev = port->dev;
>  		skb->pkt_type = pkt_type;
> +		ipvlan_proxy_l2_outbound(skb, port->dev);
>  		dev_queue_xmit(skb);
>  	} else {
>  		if (consumed)
> @@ -489,6 +573,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff
> *skb, struct net_device *dev)
>  static inline int ipvlan_process_l2_outbound(struct sk_buff *skb,
>  					     struct net_device *dev)
>  {
> +	ipvlan_proxy_l2_outbound(skb, dev);
>  	ipvlan_skb_crossing_ns(skb, dev);
>  	return dev_queue_xmit(skb);
>  }
> @@ -499,27 +584,27 @@ static int ipvlan_xmit_mode_l2(struct sk_buff
> *skb, struct net_device *dev)
>  	struct ethhdr *ethh = eth_hdr(skb);
>  	struct ipvl_addr *addr;
>  
> -	if (ether_addr_equal(ethh->h_dest, ethh->h_source)) {
> -		addr = ipvlan_get_slave_addr_dst(skb, ipvlan->port);
> -		if (addr)
> -			return ipvlan_rcv_int_frame(addr, &skb);
> +	if (is_multicast_ether_addr(ethh->h_dest)) {
> +		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
> +		return NET_XMIT_SUCCESS;
> +	}
>  
> +	if (ether_addr_equal(ethh->h_dest, ipvlan->phy_dev-
> >dev_addr)) {
>  		skb = skb_share_check(skb, GFP_ATOMIC);
>  		if (unlikely(!skb))
>  			return NET_XMIT_DROP;
>  
> -		/* Packet definitely does not belong to any of the
> -		 * virtual devices, but the dest is local. So
> forward
> -		 * the skb for the main-dev. At the RX side we just
> return
> -		 * RX_PASS for it to be processed further on the
> stack.
> +		/* Forward the skb for the master device. At the RX
> side we
> +		 * just return RX_HANDLER_PASS for it to be
> processed further
> +		 * on the stack.
>  		 */
>  		return dev_forward_skb(ipvlan->phy_dev, skb);
> -
> -	} else if (is_multicast_ether_addr(ethh->h_dest)) {
> -		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
> -		return NET_XMIT_SUCCESS;
>  	}
>  
> +	addr = ipvlan_get_slave_addr_dst(skb, ipvlan->port);
> +	if (addr)
> +		return ipvlan_rcv_int_frame(addr, &skb);
> +
>  	return ipvlan_process_l2_outbound(skb, ipvlan->phy_dev);
>  }
>  
> @@ -562,6 +647,10 @@ static int ipvlan_rcv_ext_frame(struct ipvl_addr
> *addr, struct sk_buff *skb)
>  	struct net_device *dev = ipvlan->dev;
>  	unsigned int len = skb->len + ETH_HLEN;
>  
> +	/* NOTE: although not necessary restore the actual
> destination
> +	 * address; this is also what traffic sniffers will display.
> +	 */
> +	memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, dev->addr_len);
>  	ipvlan_skb_crossing_ns(skb, dev);
>  	ipvlan_count_rx(ipvlan, len, true, false);
>  
> diff --git a/drivers/net/ipvlan/ipvlan_main.c
> b/drivers/net/ipvlan/ipvlan_main.c
> index b837807..709f27d 100644
> --- a/drivers/net/ipvlan/ipvlan_main.c
> +++ b/drivers/net/ipvlan/ipvlan_main.c
> @@ -378,6 +378,7 @@ static const struct net_device_ops
> ipvlan_netdev_ops = {
>  	.ndo_start_xmit		= ipvlan_start_xmit,
>  	.ndo_fix_features	= ipvlan_fix_features,
>  	.ndo_change_rx_flags	= ipvlan_change_rx_flags,
> +	.ndo_set_mac_address	= eth_mac_addr,
>  	.ndo_set_rx_mode	= ipvlan_set_multicast_mac_filter,
>  	.ndo_get_stats64	= ipvlan_get_stats64,
>  	.ndo_vlan_rx_add_vid	= ipvlan_vlan_rx_add_vid,
> @@ -392,9 +393,10 @@ static int ipvlan_hard_header(struct sk_buff
> *skb, struct net_device *dev,
>  	const struct ipvl_dev *ipvlan = netdev_priv(dev);
>  	struct net_device *phy_dev = ipvlan->phy_dev;
>  
> -	/* TODO Probably use a different field than dev_addr so that
> the
> -	 * mac-address on the virtual device is portable and can be
> carried
> -	 * while the packets use the mac-addr on the physical
> device.
> +	/* This driver uses (almost exclusively) L3 addresses for
> +	 * routing/switching. Use the actual slave's MAC address,
> +	 * but overwrite it later during the packet processing for
> +	 * frames leaving from master
>  	 */
>  	return dev_hard_header(skb, phy_dev, type, daddr,
>  			       saddr ? : dev->dev_addr, len);
> @@ -559,11 +561,8 @@ int ipvlan_link_new(struct net *src_net, struct
> net_device *dev,
>  	/* Increment id-base to the next slot for the future
> assignment */
>  	port->dev_id_start = err + 1;
>  
> -	/* TODO Probably put random address here to be presented to
> the
> -	 * world but keep using the physical-dev address for the
> outgoing
> -	 * packets.
> -	 */
> -	memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
> +	/* TODO: consider storing the original MAC address in dev-
> >perm_addr */
> +	eth_hw_addr_random(dev);
>  
>  	dev->priv_flags |= IFF_IPVLAN_SLAVE;
>  
> @@ -619,7 +618,8 @@ void ipvlan_link_setup(struct net_device *dev)
>  	ether_setup(dev);
>  
>  	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE |
> IFF_TX_SKB_SHARING);
> -	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
> +	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE
> +			   | IFF_LIVE_ADDR_CHANGE;
>  	dev->netdev_ops = &ipvlan_netdev_ops;
>  	dev->destructor = free_netdev;
>  	dev->header_ops = &ipvlan_header_ops;

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-04-27 16:20   ` Dan Williams
@ 2017-04-27 16:23     ` Dan Williams
  2017-05-02 15:08       ` Chiappero, Marco
  0 siblings, 1 reply; 27+ messages in thread
From: Dan Williams @ 2017-04-27 16:23 UTC (permalink / raw)
  To: Marco Chiappero, netdev
  Cc: David S . Miller, Jeff Kirsher, Alexander Duyck, Sainath Grandhi,
	Mahesh Bandewar

On Thu, 2017-04-27 at 11:20 -0500, Dan Williams wrote:
> On Thu, 2017-04-27 at 15:51 +0100, Marco Chiappero wrote:
> > Currently all the slave devices belonging to the same port inherit
> > their
> > MAC address from its master device. This patch removes this
> > limitation
> > and allows every slave device to obtain a unique MAC address, by
> > default
> > randomly generated at creation time.
> > 
> > Moreover it is now possible to correctly modify the MAC address at
> > any
> > time, fixing an existing bug as MAC address changes on the master
> > were
> > not reflected on the slaves. It also avoids multiple interfaces
> > sharing
> > the same IPv6 link-local address.
> 
> How is this different than macvlan now?  Why would you use unique
> addressed ipvlan instances instead of macvlan?  Wouldn't the same
> problems around external switches not expecting multiple MACs from
> the
> same switch port apply now to ipvlan?
> 
> The whole point of ipvlan AIUI was to get around macvlan problems
> related to multiple MACs on the same port.

Another issue is the unicast MAC limits on cards.  ipvlan is now much
more likely to hit the unicast MAC limit of the NIC and thus trigger
promiscuous mode and the resulting performance drop, where before it
would not.

Dan

> Also, I think the IPv6 thing you mention is incorrect and has long
> since been solved.  Originally, ipvlan did not include a "dev_id"
> property that differened between child interfaces, and thus the IID
> of
> the each interface was the same.  That has now been fixed, and each
> ipvlan slave should now have a different IID and thus a different
> link-
> local address.
> 
> Dan
> 
> > Since ipvlan is designed to expose a single MAC address for
> > external
> > communications, the driver now behaves as follow:
> > - L2 mode:
> >    * Any reference to the internal MAC address of the originating
> > slave
> >      is replaced with the MAC address of the master for outbound
> > frames.
> >    * Likewise, the destination MAC address is overwritten with the
> >      internal one (once the correct slave is determined) for any
> >      incoming external frame.
> >    * For any internal slave-to-slave communication, the original
> > MAC
> >      addresses are preserved (although not used for
> > routing/switching).
> > - L3/L3s mode:
> >    * The destination MAC address for incoming external packets is
> >      replaced with the one belonging to the destination slave
> > device
> >      (as for L2 mode)
> >    * Every other path behaves as before.
> > 
> > Being a significant behavioral change, version number has been
> > increased.
> > 
> > Signed-off-by: Marco Chiappero <marco.chiappero@intel.com>
> > Tested-by: Marco Chiappero <marco.chiappero@intel.com>
> > ---
> >  drivers/net/ipvlan/ipvlan.h      |   2 +-
> >  drivers/net/ipvlan/ipvlan_core.c | 113
> > ++++++++++++++++++++++++++++++++++-----
> >  drivers/net/ipvlan/ipvlan_main.c |  18 +++----
> >  3 files changed, 111 insertions(+), 22 deletions(-)
> > 
> > diff --git a/drivers/net/ipvlan/ipvlan.h
> > b/drivers/net/ipvlan/ipvlan.h
> > index 800a46c..efe4fd1 100644
> > --- a/drivers/net/ipvlan/ipvlan.h
> > +++ b/drivers/net/ipvlan/ipvlan.h
> > @@ -32,7 +32,7 @@
> >  #include <net/l3mdev.h>
> >  
> >  #define IPVLAN_DRV	"ipvlan"
> > -#define IPV_DRV_VER	"0.1"
> > +#define IPV_DRV_VER	"0.2"
> >  
> >  #define IPVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
> >  #define IPVLAN_HASH_MASK	(IPVLAN_HASH_SIZE - 1)
> > diff --git a/drivers/net/ipvlan/ipvlan_core.c
> > b/drivers/net/ipvlan/ipvlan_core.c
> > index 67e342d..a30bc11 100644
> > --- a/drivers/net/ipvlan/ipvlan_core.c
> > +++ b/drivers/net/ipvlan/ipvlan_core.c
> > @@ -215,6 +215,89 @@ static void ipvlan_skb_crossing_ns(struct
> > sk_buff *skb, struct net_device *dev)
> >  		skb->dev = dev;
> >  }
> >  
> > +static inline struct nd_opt_hdr *ipvlan_icmp6_nd_opts(struct
> > icmp6hdr *icmph)
> > +{
> > +	return (struct nd_opt_hdr *)((struct nd_msg *)icmph)->opt;
> > +}
> > +
> > +static inline struct nd_opt_hdr *ipvlan_icmp6_rs_opts(struct
> > icmp6hdr *icmph)
> > +{
> > +	return (struct nd_opt_hdr *)((struct rs_msg *)icmph)->opt;
> > +}
> > +
> > +static void ipvlan_proxy_l2_update_icmp6(const struct net_device
> > *master,
> > +					 struct sk_buff *skb,
> > +					 struct nd_opt_hdr
> > *nd_opt,
> > +					 u8 opt_type)
> > +{
> > +	u32 opts_len = skb_tail_pointer(skb) - (u8 *)nd_opt;
> > +
> > +	while (opts_len) {
> > +		u32 opt_len = nd_opt->nd_opt_len << 3;
> > +
> > +		if (nd_opt->nd_opt_type == opt_type) {
> > +			struct ipv6hdr *ip6h = ipv6_hdr(skb);
> > +			struct icmp6hdr *icmph = icmp6_hdr(skb);
> > +			u32 len = ntohs(ip6h->payload_len);
> > +
> > +			memcpy(nd_opt + 1, master->dev_addr,
> > master-
> > > addr_len);
> > 
> > +			icmph->icmp6_cksum = 0;
> > +			icmph->icmp6_cksum =
> > +				csum_ipv6_magic(&ip6h->saddr,
> > +						&ip6h->daddr, len,
> > +						IPPROTO_ICMPV6,
> > +						csum_partial(icmph
> > ,
> > len, 0));
> > +			return;
> > +		}
> > +
> > +		opts_len -= opt_len;
> > +		nd_opt = ((void *)nd_opt) + opt_len;
> > +	}
> > +}
> > +
> > +static void ipvlan_proxy_l2_outbound(struct sk_buff *skb,
> > +				     const struct net_device
> > *master)
> > +{
> > +	/* masquerade the source MAC address for every outgoing
> > frame */
> > +	memcpy(eth_hdr(skb)->h_source, master->dev_addr, master-
> > > addr_len);
> > 
> > +
> > +	/* ARP and some NDISC packets need additional treatment */
> > +	if (skb->protocol == htons(ETH_P_IPV6)) {
> > +		struct ipv6hdr *ip6h = ipv6_hdr(skb);
> > +		struct icmp6hdr *icmph = icmp6_hdr(skb);
> > +		struct nd_opt_hdr *nd_opt;
> > +		u8 opt_type;
> > +
> > +		if (likely(ip6h->nexthdr != NEXTHDR_ICMP))
> > +			return;
> > +
> > +		switch (icmph->icmp6_type) {
> > +		case NDISC_NEIGHBOUR_SOLICITATION: {
> > +			nd_opt = ipvlan_icmp6_nd_opts(icmph);
> > +			opt_type = ND_OPT_SOURCE_LL_ADDR;
> > +			break;
> > +		}
> > +		case NDISC_NEIGHBOUR_ADVERTISEMENT: {
> > +			nd_opt = ipvlan_icmp6_nd_opts(icmph);
> > +			opt_type = ND_OPT_TARGET_LL_ADDR;
> > +			break;
> > +		}
> > +		case NDISC_ROUTER_SOLICITATION: {
> > +			nd_opt = ipvlan_icmp6_rs_opts(icmph);
> > +			opt_type = ND_OPT_SOURCE_LL_ADDR;
> > +			break;
> > +		}
> > +		default:
> > +			return;
> > +		}
> > +
> > +		ipvlan_proxy_l2_update_icmp6(master, skb, nd_opt,
> > opt_type);
> > +
> > +	} else if (unlikely(skb->protocol == htons(ETH_P_ARP))) {
> > +		memcpy(arp_hdr(skb) + 1, master->dev_addr, master-
> > > addr_len);
> > 
> > +	}
> > +}
> > +
> >  static void ipvlan_dispatch_multicast(struct ipvl_port *port,
> >  				      struct sk_buff *skb, u8
> > pkt_type,
> >  				      unsigned int mac_hash)
> > @@ -258,6 +341,7 @@ static void ipvlan_dispatch_multicast(struct
> > ipvl_port *port,
> >  		/* If the packet originated here, send it out. */
> >  		skb->dev = port->dev;
> >  		skb->pkt_type = pkt_type;
> > +		ipvlan_proxy_l2_outbound(skb, port->dev);
> >  		dev_queue_xmit(skb);
> >  	} else {
> >  		if (consumed)
> > @@ -489,6 +573,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff
> > *skb, struct net_device *dev)
> >  static inline int ipvlan_process_l2_outbound(struct sk_buff *skb,
> >  					     struct net_device
> > *dev)
> >  {
> > +	ipvlan_proxy_l2_outbound(skb, dev);
> >  	ipvlan_skb_crossing_ns(skb, dev);
> >  	return dev_queue_xmit(skb);
> >  }
> > @@ -499,27 +584,27 @@ static int ipvlan_xmit_mode_l2(struct sk_buff
> > *skb, struct net_device *dev)
> >  	struct ethhdr *ethh = eth_hdr(skb);
> >  	struct ipvl_addr *addr;
> >  
> > -	if (ether_addr_equal(ethh->h_dest, ethh->h_source)) {
> > -		addr = ipvlan_get_slave_addr_dst(skb, ipvlan-
> > >port);
> > -		if (addr)
> > -			return ipvlan_rcv_int_frame(addr, &skb);
> > +	if (is_multicast_ether_addr(ethh->h_dest)) {
> > +		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
> > +		return NET_XMIT_SUCCESS;
> > +	}
> >  
> > +	if (ether_addr_equal(ethh->h_dest, ipvlan->phy_dev-
> > > dev_addr)) {
> > 
> >  		skb = skb_share_check(skb, GFP_ATOMIC);
> >  		if (unlikely(!skb))
> >  			return NET_XMIT_DROP;
> >  
> > -		/* Packet definitely does not belong to any of the
> > -		 * virtual devices, but the dest is local. So
> > forward
> > -		 * the skb for the main-dev. At the RX side we
> > just
> > return
> > -		 * RX_PASS for it to be processed further on the
> > stack.
> > +		/* Forward the skb for the master device. At the
> > RX
> > side we
> > +		 * just return RX_HANDLER_PASS for it to be
> > processed further
> > +		 * on the stack.
> >  		 */
> >  		return dev_forward_skb(ipvlan->phy_dev, skb);
> > -
> > -	} else if (is_multicast_ether_addr(ethh->h_dest)) {
> > -		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
> > -		return NET_XMIT_SUCCESS;
> >  	}
> >  
> > +	addr = ipvlan_get_slave_addr_dst(skb, ipvlan->port);
> > +	if (addr)
> > +		return ipvlan_rcv_int_frame(addr, &skb);
> > +
> >  	return ipvlan_process_l2_outbound(skb, ipvlan->phy_dev);
> >  }
> >  
> > @@ -562,6 +647,10 @@ static int ipvlan_rcv_ext_frame(struct
> > ipvl_addr
> > *addr, struct sk_buff *skb)
> >  	struct net_device *dev = ipvlan->dev;
> >  	unsigned int len = skb->len + ETH_HLEN;
> >  
> > +	/* NOTE: although not necessary restore the actual
> > destination
> > +	 * address; this is also what traffic sniffers will
> > display.
> > +	 */
> > +	memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, dev-
> > >addr_len);
> >  	ipvlan_skb_crossing_ns(skb, dev);
> >  	ipvlan_count_rx(ipvlan, len, true, false);
> >  
> > diff --git a/drivers/net/ipvlan/ipvlan_main.c
> > b/drivers/net/ipvlan/ipvlan_main.c
> > index b837807..709f27d 100644
> > --- a/drivers/net/ipvlan/ipvlan_main.c
> > +++ b/drivers/net/ipvlan/ipvlan_main.c
> > @@ -378,6 +378,7 @@ static const struct net_device_ops
> > ipvlan_netdev_ops = {
> >  	.ndo_start_xmit		= ipvlan_start_xmit,
> >  	.ndo_fix_features	= ipvlan_fix_features,
> >  	.ndo_change_rx_flags	= ipvlan_change_rx_flags,
> > +	.ndo_set_mac_address	= eth_mac_addr,
> >  	.ndo_set_rx_mode	= ipvlan_set_multicast_mac_filter,
> >  	.ndo_get_stats64	= ipvlan_get_stats64,
> >  	.ndo_vlan_rx_add_vid	= ipvlan_vlan_rx_add_vid,
> > @@ -392,9 +393,10 @@ static int ipvlan_hard_header(struct sk_buff
> > *skb, struct net_device *dev,
> >  	const struct ipvl_dev *ipvlan = netdev_priv(dev);
> >  	struct net_device *phy_dev = ipvlan->phy_dev;
> >  
> > -	/* TODO Probably use a different field than dev_addr so
> > that
> > the
> > -	 * mac-address on the virtual device is portable and can
> > be
> > carried
> > -	 * while the packets use the mac-addr on the physical
> > device.
> > +	/* This driver uses (almost exclusively) L3 addresses for
> > +	 * routing/switching. Use the actual slave's MAC address,
> > +	 * but overwrite it later during the packet processing for
> > +	 * frames leaving from master
> >  	 */
> >  	return dev_hard_header(skb, phy_dev, type, daddr,
> >  			       saddr ? : dev->dev_addr, len);
> > @@ -559,11 +561,8 @@ int ipvlan_link_new(struct net *src_net,
> > struct
> > net_device *dev,
> >  	/* Increment id-base to the next slot for the future
> > assignment */
> >  	port->dev_id_start = err + 1;
> >  
> > -	/* TODO Probably put random address here to be presented
> > to
> > the
> > -	 * world but keep using the physical-dev address for the
> > outgoing
> > -	 * packets.
> > -	 */
> > -	memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
> > +	/* TODO: consider storing the original MAC address in dev-
> > > perm_addr */
> > 
> > +	eth_hw_addr_random(dev);
> >  
> >  	dev->priv_flags |= IFF_IPVLAN_SLAVE;
> >  
> > @@ -619,7 +618,8 @@ void ipvlan_link_setup(struct net_device *dev)
> >  	ether_setup(dev);
> >  
> >  	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE |
> > IFF_TX_SKB_SHARING);
> > -	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
> > +	dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE
> > +			   | IFF_LIVE_ADDR_CHANGE;
> >  	dev->netdev_ops = &ipvlan_netdev_ops;
> >  	dev->destructor = free_netdev;
> >  	dev->header_ops = &ipvlan_header_ops;

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 0/9] support unique MAC addresses for slave devices
  2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
                   ` (8 preceding siblings ...)
  2017-04-27 14:51 ` [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses Marco Chiappero
@ 2017-04-27 19:21 ` Mahesh Bandewar (महेश बंडेवार)
  2017-05-02 15:12   ` Chiappero, Marco
  9 siblings, 1 reply; 27+ messages in thread
From: Mahesh Bandewar (महेश बंडेवार) @ 2017-04-27 19:21 UTC (permalink / raw)
  To: Marco Chiappero
  Cc: linux-netdev, David S . Miller, Jeff Kirsher, Alexander Duyck,
	Sainath Grandhi

On Thu, Apr 27, 2017 at 7:51 AM, Marco Chiappero
<marco.chiappero@intel.com> wrote:
> Currently every slave device gets assigned the same MAC address, by
> having it copied from the master interface. Since some code paths
> depend on this identity, changing the MAC address on slave interfaces
> is not supported. However identical MAC addresses can pose problems to
> management and orchestration software that correctly expect network
> interfaces on the same segment to have unique addresses.
>
Please understand that there are two distinct drivers IPvlan and
MACvlan. They both exist together for good reasons and are trying to
cater for different needs. I would love to combine them together if we
don't mess / miss the goodies each of them have to offer... otherwise
*NO*! Having said that if management / orchestration software has
problems then clearly you should not use IPvlan for that use case.

> Patches 1-8 include style fixes and refactoring (patch 9 depends upon)
> that improve the overal quality and make the intruduction of the
> feature straightforward.
>
Lots of this fall into I-say-potato-you-say-... category. My way of
thinking / organizing code is different than yours and you don't have
to like mine and I don't have to like yours.

> Patch 9 enables slave devices to own unique MAC addresses and change
> such addresses live, fixing lack of support and a related bug, as
> MAC address changes on master were not propagated to slave devices.
> In order to preserve the main peculiarity of this driver, that is
> exposing only a single MAC address for outbound traffic, frames
> egressing from master are now effectively masquerated when working in
> L2 mode.
>
This enhancement is, however, coming via packet-header rewrite for
every Tx/Rx packet which defeats the purpose. The only good thing that
came in light is the mac-addr change propagation from master issue;
but if the fix is coming as a side-effect of header rewrite then it's
not an acceptable fix either. This can be simply fixed by changing a
line in ipvlan_hard_header().

> Marco Chiappero (9):
>   ipvlan: fix coding style for the ipvlan tree
>   ipvlan: refactor ipvlan_process_multicast for readability
>   ipvlan: replace ipvlan_rcv_frame
>   ipvlan: rework the IP lookup function
>   ipvlan: improve and uniform naming
>   ipvlan: reposition three functions
>   ipvlan: relocate ipvlan_skb_crossing_ns calls
>   ipvlan: improve compiler hints
>   ipvlan: introduce individual MAC addresses
>
>  drivers/net/ipvlan/ipvlan.h      |   2 +-
>  drivers/net/ipvlan/ipvlan_core.c | 592 ++++++++++++++++++++-------------------
>  drivers/net/ipvlan/ipvlan_main.c |  49 ++--
>  drivers/net/ipvlan/ipvtap.c      |   1 +
>  4 files changed, 333 insertions(+), 311 deletions(-)
>
> --
> 2.9.3
>
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
>
>
> This e-mail and any attachments may contain confidential material for the sole
> use of the intended recipient(s). Any review or distribution by others is
> strictly prohibited. If you are not the intended recipient, please contact the
> sender and delete all copies.
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-04-27 14:51 ` [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses Marco Chiappero
  2017-04-27 16:20   ` Dan Williams
@ 2017-04-27 20:30   ` kbuild test robot
  1 sibling, 0 replies; 27+ messages in thread
From: kbuild test robot @ 2017-04-27 20:30 UTC (permalink / raw)
  To: Marco Chiappero
  Cc: kbuild-all, netdev, David S . Miller, Jeff Kirsher,
	Alexander Duyck, Sainath Grandhi, Mahesh Bandewar,
	Marco Chiappero

[-- Attachment #1: Type: text/plain, Size: 1561 bytes --]

Hi Marco,

[auto build test ERROR on net/master]
[also build test ERROR on v4.11-rc8]
[cannot apply to net-next/master next-20170427]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Marco-Chiappero/support-unique-MAC-addresses-for-slave-devices/20170428-022313
config: tile-allmodconfig (attached as .config)
compiler: tilegx-linux-gcc (GCC) 4.6.2
reproduce:
        wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=tile 

All errors (new ones prefixed by >>):

   drivers/net//ipvlan/ipvlan_core.c: In function 'ipvlan_proxy_l2_update_icmp6':
>> drivers/net//ipvlan/ipvlan_core.c:246:5: error: implicit declaration of function 'csum_ipv6_magic'
   cc1: some warnings being treated as errors

vim +/csum_ipv6_magic +246 drivers/net//ipvlan/ipvlan_core.c

   240				struct icmp6hdr *icmph = icmp6_hdr(skb);
   241				u32 len = ntohs(ip6h->payload_len);
   242	
   243				memcpy(nd_opt + 1, master->dev_addr, master->addr_len);
   244				icmph->icmp6_cksum = 0;
   245				icmph->icmp6_cksum =
 > 246					csum_ipv6_magic(&ip6h->saddr,
   247							&ip6h->daddr, len,
   248							IPPROTO_ICMPV6,
   249							csum_partial(icmph, len, 0));

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 48036 bytes --]

^ permalink raw reply	[flat|nested] 27+ messages in thread

* RE: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-04-27 16:23     ` Dan Williams
@ 2017-05-02 15:08       ` Chiappero, Marco
  2017-05-02 16:09         ` Dan Williams
  0 siblings, 1 reply; 27+ messages in thread
From: Chiappero, Marco @ 2017-05-02 15:08 UTC (permalink / raw)
  To: 'Dan Williams', netdev
  Cc: David S . Miller, Kirsher, Jeffrey T, Duyck, Alexander H,
	Grandhi, Sainath, Mahesh Bandewar


> -----Original Message-----
> From: Dan Williams [mailto:dcbw@redhat.com]
> On Thu, 2017-04-27 at 11:20 -0500, Dan Williams wrote:
> > On Thu, 2017-04-27 at 15:51 +0100, Marco Chiappero wrote:
> > > Currently all the slave devices belonging to the same port inherit
> > > their MAC address from its master device. This patch removes this
> > > limitation and allows every slave device to obtain a unique MAC
> > > address, by default randomly generated at creation time.
> > >
> > > Moreover it is now possible to correctly modify the MAC address at
> > > any time, fixing an existing bug as MAC address changes on the
> > > master were not reflected on the slaves. It also avoids multiple
> > > interfaces sharing the same IPv6 link-local address.
> >
> > How is this different than macvlan now?

The same way it was before. The purpose of the patch is to make possible to change the MAC address on slaves, not to change the external behavior of ipvlan: ipvlan will still behave as ipvlan, macvlan will still behave as macvlan.

>>  Why would you use unique
> > addressed ipvlan instances instead of macvlan?  Wouldn't the same
> > problems around external switches not expecting multiple MACs from the
> > same switch port apply now to ipvlan?

No. I'm willing to rework the commit messages if this point is not clear.
The idea is to effectively NAT communications, which is more or less the whole concept behind ipvlan.

> > The whole point of ipvlan AIUI was to get around macvlan problems
> > related to multiple MACs on the same port.
> 
> Another issue is the unicast MAC limits on cards.  ipvlan is now much more likely
> to hit the unicast MAC limit of the NIC and thus trigger promiscuous mode and
> the resulting performance drop, where before it would not.

The outside world will still see and use only one unicast MAC address, belonging to the master interface. No need to store any other unicast MAC in the filter.

Multicast filters will work as before, the only difference is that now slaves have different IPv6 link-local addresses, with an associated L2 multicast address). If this turns out to be an issue it's possible to keep the same MAC address by default but still allow it change. Or maybe force the same default link-local address somehow.
 
However, if the MAC filter is really the issue, I strongly encourage you to start working with HW vendors in order to better support modern data center workloads as a long term solution.

> > Also, I think the IPv6 thing you mention is incorrect and has long
> > since been solved.  Originally, ipvlan did not include a "dev_id"
> > property that differened between child interfaces, and thus the IID of
> > the each interface was the same.  That has now been fixed, and each
> > ipvlan slave should now have a different IID and thus a different
> > link-
> > local address.

Yes, thank you for pointing it out. I started this change a few months ago when such fix not present and simply rebased lately without noticing it.

Please let me know if you have further questions.

Thank you,
Marco
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* RE: [PATCH net-next 0/9] support unique MAC addresses for slave devices
  2017-04-27 19:21 ` [PATCH net-next 0/9] support unique MAC addresses for slave devices Mahesh Bandewar (महेश बंडेवार)
@ 2017-05-02 15:12   ` Chiappero, Marco
  0 siblings, 0 replies; 27+ messages in thread
From: Chiappero, Marco @ 2017-05-02 15:12 UTC (permalink / raw)
  To: maheshb, linux-netdev
  Cc: David S . Miller, Kirsher, Jeffrey T, Duyck, Alexander H,
	Grandhi, Sainath


> -----Original Message-----
> From: Mahesh Bandewar (महेश बंडेवार)
> 
> On Thu, Apr 27, 2017 at 7:51 AM, Marco Chiappero
> <marco.chiappero@intel.com> wrote:
> > Currently every slave device gets assigned the same MAC address, by
> > having it copied from the master interface. Since some code paths
> > depend on this identity, changing the MAC address on slave interfaces
> > is not supported. However identical MAC addresses can pose problems to
> > management and orchestration software that correctly expect network
> > interfaces on the same segment to have unique addresses.
> >
> Please understand that there are two distinct drivers IPvlan and MACvlan. They
> both exist together for good reasons and are trying to cater for different needs. I
> would love to combine them together if we don't mess / miss the goodies each
> of them have to offer... otherwise *NO*! Having said that if management /
> orchestration software has problems then clearly you should not use IPvlan for
> that use case.

I don't want to start a conversation on whether that software is the problem or not, which is probably arguable anyway, but their combination with ipvlan is a perfectly valid use case, the same way macvlan is.
Generally speaking, it's very reasonable and indeed common to look up interfaces by their MAC address. Being able to change the MAC address is also a reasonable expectation, especially for a soft interface which does not even use unicast L2 addresses for forwarding decisions.

> > Patches 1-8 include style fixes and refactoring (patch 9 depends upon)
> > that improve the overal quality and make the intruduction of the
> > feature straightforward.
> >
> Lots of this fall into I-say-potato-you-say-... category. My way of thinking /
> organizing code is different than yours and you don't have to like mine and I
> don't have to like yours.

A few patches are definitely subject to personal taste, I have no problem dropping them although I still see improvements in readability. Having that said:
- patch 1 is based on checkpatch.pl, so it's substantially objective.
- patches 3 and 4 are well motivated and driven by very specific reasons: both improve readability, remove unnecessary jumps and 4 in particular removes a good amount of duplicated code (52 insertions, 135 deletions).

> > Patch 9 enables slave devices to own unique MAC addresses and change
> > such addresses live, fixing lack of support and a related bug, as MAC
> > address changes on master were not propagated to slave devices.
> > In order to preserve the main peculiarity of this driver, that is
> > exposing only a single MAC address for outbound traffic, frames
> > egressing from master are now effectively masquerated when working in
> > L2 mode.
> >
> This enhancement is, however, coming via packet-header rewrite for every
> Tx/Rx packet which defeats the purpose. 

This only applies to N-S traffic in L2 mode, but nothing prevents you from reversing the logic by modifying ipvlan_hard_header and moving the rewrite to E-W traffic instead. I've made this decision in order to fully emulate an L2 network for slaves and make the master interface appear as a sort of L2 NAT. Alternatively you can limit yourself to the mangling done upfront in ipvlan_hard_header, although when sniffing from a slave things will appear confusing.

Now, if by "purpose" you mean performance implications in L2 mode, I doubt a small memcpy done while handling the skb has a major impact. In fact from the few performance tests I run I could not see any noticeable difference. I'd be happy to investigate this further with additional benchmarks in order to make a more informed decision.

L3 and L3s modes are not affected at all (actually they are on the reception side but this can be modified), so this change can be introduced at no cost here.

> The only good thing that came in light
> is the mac-addr change propagation from master issue; but if the fix is coming as
> a side-effect of header rewrite then it's not an acceptable fix either. 

It's not at all a side-effect of the header rewrite, it's rather the opposite. Either you force MAC addresses to stay in sync or let them vary individually, which is what I'm proposing for the reasons mentioned earlier. How you achieve this is a different story.

Of course, the former case can be fixed by simply doing what macvlan does in passthru mode, that is, sync up the slave(s) upon address change on the master via observer. The latter case can be addressed in one of the above ways. I would like to have such conversation especially in light of the following comments from you:

392         /* TODO Probably use a different field than dev_addr so that the
393          * mac-address on the virtual device is portable and can be carried
394          * while the packets use the mac-addr on the physical device.
395          */

536         /* TODO Probably put random address here to be presented to the
537          * world but keep using the physical-dev address for the outgoing
538          * packets.
539          */

> This can be
> simply fixed by changing a line in ipvlan_hard_header().

This is conceptually the same approach as above, you would still need to mangle ARP and ND frames for N-S traffic to make it work though, which is the biggest part of patch 9. 

Please, review the patch and elaborate specific concerns.

Thank you,
Marco
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-05-02 15:08       ` Chiappero, Marco
@ 2017-05-02 16:09         ` Dan Williams
  2017-05-04  9:37           ` Chiappero, Marco
  0 siblings, 1 reply; 27+ messages in thread
From: Dan Williams @ 2017-05-02 16:09 UTC (permalink / raw)
  To: Chiappero, Marco, netdev
  Cc: David S . Miller, Kirsher, Jeffrey T, Duyck, Alexander H,
	Grandhi, Sainath, Mahesh Bandewar

On Tue, 2017-05-02 at 15:08 +0000, Chiappero, Marco wrote:
> > -----Original Message-----
> > From: Dan Williams [mailto:dcbw@redhat.com]
> > On Thu, 2017-04-27 at 11:20 -0500, Dan Williams wrote:
> > > On Thu, 2017-04-27 at 15:51 +0100, Marco Chiappero wrote:
> > > > Currently all the slave devices belonging to the same port
> > > > inherit
> > > > their MAC address from its master device. This patch removes
> > > > this
> > > > limitation and allows every slave device to obtain a unique MAC
> > > > address, by default randomly generated at creation time.
> > > > 
> > > > Moreover it is now possible to correctly modify the MAC address
> > > > at
> > > > any time, fixing an existing bug as MAC address changes on the
> > > > master were not reflected on the slaves. It also avoids
> > > > multiple
> > > > interfaces sharing the same IPv6 link-local address.
> > > 
> > > How is this different than macvlan now?
> 
> The same way it was before. The purpose of the patch is to make
> possible to change the MAC address on slaves, not to change the
> external behavior of ipvlan: ipvlan will still behave as ipvlan,
> macvlan will still behave as macvlan.

Ok, it was completely unclear from the commit message that the
"internal" MAC addresses of the ipvlan interfaces were not reflected
"on the wire", but that this was essentially (as you say below) MAC
NAT.

I think everyone agrees that being able to change the MAC is useful and
was a bug.

What I'm still not clear on is, if IPv6 is already solved, why is it
useful to have assign ipvlan interface a unique MAC address?  Is it
only to make interface lookups via MAC easier?

Dan

> > >   Why would you use unique
> > > addressed ipvlan instances instead of macvlan?  Wouldn't the same
> > > problems around external switches not expecting multiple MACs
> > > from the
> > > same switch port apply now to ipvlan?
> 
> No. I'm willing to rework the commit messages if this point is not
> clear.
> The idea is to effectively NAT communications, which is more or less
> the whole concept behind ipvlan.
> 
> > > The whole point of ipvlan AIUI was to get around macvlan problems
> > > related to multiple MACs on the same port.
> > 
> > Another issue is the unicast MAC limits on cards.  ipvlan is now
> > much more likely
> > to hit the unicast MAC limit of the NIC and thus trigger
> > promiscuous mode and
> > the resulting performance drop, where before it would not.
> 
> The outside world will still see and use only one unicast MAC
> address, belonging to the master interface. No need to store any
> other unicast MAC in the filter.
> 
> Multicast filters will work as before, the only difference is that
> now slaves have different IPv6 link-local addresses, with an
> associated L2 multicast address). If this turns out to be an issue
> it's possible to keep the same MAC address by default but still allow
> it change. Or maybe force the same default link-local address
> somehow.
>  
> However, if the MAC filter is really the issue, I strongly encourage
> you to start working with HW vendors in order to better support
> modern data center workloads as a long term solution.
> 
> > > Also, I think the IPv6 thing you mention is incorrect and has
> > > long
> > > since been solved.  Originally, ipvlan did not include a "dev_id"
> > > property that differened between child interfaces, and thus the
> > > IID of
> > > the each interface was the same.  That has now been fixed, and
> > > each
> > > ipvlan slave should now have a different IID and thus a different
> > > link-
> > > local address.
> 
> Yes, thank you for pointing it out. I started this change a few
> months ago when such fix not present and simply rebased lately
> without noticing it.
> 
> Please let me know if you have further questions.
> 
> Thank you,
> Marco
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County
> Kildare
> Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for
> the sole
> use of the intended recipient(s). Any review or distribution by
> others is
> strictly prohibited. If you are not the intended recipient, please
> contact the
> sender and delete all copies.
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* RE: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-05-02 16:09         ` Dan Williams
@ 2017-05-04  9:37           ` Chiappero, Marco
  2017-05-04 16:43             ` Jiri Benc
  2017-05-04 22:09             ` Mahesh Bandewar (महेश बंडेवार)
  0 siblings, 2 replies; 27+ messages in thread
From: Chiappero, Marco @ 2017-05-04  9:37 UTC (permalink / raw)
  To: Dan Williams, netdev
  Cc: David S . Miller, Kirsher, Jeffrey T, Duyck, Alexander H,
	Grandhi, Sainath, Mahesh Bandewar

> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> On Behalf Of Dan Williams
> Sent: Tuesday, May 2, 2017 5:09 PM
> To: Chiappero, Marco <marco.chiappero@intel.com>; netdev@vger.kernel.org
> Cc: David S . Miller <davem@davemloft.net>; Kirsher, Jeffrey T
> <jeffrey.t.kirsher@intel.com>; Duyck, Alexander H
> <alexander.h.duyck@intel.com>; Grandhi, Sainath
> <sainath.grandhi@intel.com>; Mahesh Bandewar <maheshb@google.com>
> Subject: Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
> 
> On Tue, 2017-05-02 at 15:08 +0000, Chiappero, Marco wrote:
> > > -----Original Message-----
> > > From: Dan Williams [mailto:dcbw@redhat.com] On Thu, 2017-04-27 at
> > > 11:20 -0500, Dan Williams wrote:
> > > > On Thu, 2017-04-27 at 15:51 +0100, Marco Chiappero wrote:
> > > > > Currently all the slave devices belonging to the same port
> > > > > inherit their MAC address from its master device. This patch
> > > > > removes this limitation and allows every slave device to obtain
> > > > > a unique MAC address, by default randomly generated at creation
> > > > > time.
> > > > >
> > > > > Moreover it is now possible to correctly modify the MAC address
> > > > > at any time, fixing an existing bug as MAC address changes on
> > > > > the master were not reflected on the slaves. It also avoids
> > > > > multiple interfaces sharing the same IPv6 link-local address.
> > > >
> > > > How is this different than macvlan now?
> >
> > The same way it was before. The purpose of the patch is to make
> > possible to change the MAC address on slaves, not to change the
> > external behavior of ipvlan: ipvlan will still behave as ipvlan,
> > macvlan will still behave as macvlan.
> 
> Ok, it was completely unclear from the commit message that the "internal" MAC
> addresses of the ipvlan interfaces were not reflected "on the wire", but that this
> was essentially (as you say below) MAC NAT.

Sorry about that, I'll fix it in V2.

> I think everyone agrees that being able to change the MAC is useful and was a
> bug.
> 
> What I'm still not clear on is, if IPv6 is already solved, why is it useful to have
> assign ipvlan interface a unique MAC address?  Is it only to make interface
> lookups via MAC easier?

The main motivation is that some higher level management software expect interfaces on the same L2 broadcast domain to obviously have different MAC addresses, either out-of-the-box or via address change - or both.  Instead with ipvlan:
- there is no real/formal segmentation between slaves
- slaves share the same L2 address
This looks conceptually wrong. Yes, ipvlan works at L3 (which is an implementation detail anyway), but slaves are Ethernet interfaces and should behave as much as possible as such regardless, with an individual MAC address assigned.

Additionally there is another related bug as it's currently possible to work around this limitation, although breaking the whole thing, by:
1) changing the MAC address on master from X to Y
2) creating a slave, receiving address Y
3) restoring the original MAC address X on master

So, either we fix this by forcing slaves to stay in sync with master, or correctly support independent MAC addresses, which would be IMO preferable for the above reasons.

Best Regards,
Marco
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-05-04  9:37           ` Chiappero, Marco
@ 2017-05-04 16:43             ` Jiri Benc
  2017-05-04 22:15               ` Mahesh Bandewar (महेश बंडेवार)
  2017-05-08 15:29               ` Chiappero, Marco
  2017-05-04 22:09             ` Mahesh Bandewar (महेश बंडेवार)
  1 sibling, 2 replies; 27+ messages in thread
From: Jiri Benc @ 2017-05-04 16:43 UTC (permalink / raw)
  To: Chiappero, Marco
  Cc: Dan Williams, netdev, David S . Miller, Kirsher, Jeffrey T,
	Duyck, Alexander H, Grandhi, Sainath, Mahesh Bandewar

On Thu, 4 May 2017 09:37:00 +0000, Chiappero, Marco wrote:
> This looks conceptually wrong. Yes, ipvlan works at L3 (which is an
> implementation detail anyway), but slaves are Ethernet interfaces and
> should behave as much as possible as such regardless, with an
> individual MAC address assigned.

Isn't the proper fix then converting ipvlan interfaces to be L3 only
interfaces? I.e., ARPHRD_NONE? There's not much ipvlan can do with
arbitrary Ethernet frames anyway. Of course, a flag to switch to the
new behavior would be needed in order to preserve backwards
compatibility.

This patchset looks very wrong. For proper support of multiple MAC
addresses, we have macvlan and it's pointless to add that to ipvlan.
And doing some kind of weird MAC NAT in ipvlan just to satisfy broken
tools that can't cope with multiple interfaces with the same MAC address
is wrong, too. Those tools are already broken anyway, there's nothing
preventing anyone to set the same MAC address to multiple interfaces.
I suppose those tools don't work with bonding and bridge, either?

> So, either we fix this by forcing slaves to stay in sync with master,

Yes, that's the correct behavior. Well, at least as correct as one can
get with the ipvlan broken design of pretending that an interface is L2
when in fact, it is not.

 Jiri

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-05-04  9:37           ` Chiappero, Marco
  2017-05-04 16:43             ` Jiri Benc
@ 2017-05-04 22:09             ` Mahesh Bandewar (महेश बंडेवार)
  1 sibling, 0 replies; 27+ messages in thread
From: Mahesh Bandewar (महेश बंडेवार) @ 2017-05-04 22:09 UTC (permalink / raw)
  To: Chiappero, Marco
  Cc: Dan Williams, netdev, David S . Miller, Kirsher, Jeffrey T,
	Duyck, Alexander H, Grandhi, Sainath

On Thu, May 4, 2017 at 2:37 AM, Chiappero, Marco
<marco.chiappero@intel.com> wrote:
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
>> On Behalf Of Dan Williams
>> Sent: Tuesday, May 2, 2017 5:09 PM
>> To: Chiappero, Marco <marco.chiappero@intel.com>; netdev@vger.kernel.org
>> Cc: David S . Miller <davem@davemloft.net>; Kirsher, Jeffrey T
>> <jeffrey.t.kirsher@intel.com>; Duyck, Alexander H
>> <alexander.h.duyck@intel.com>; Grandhi, Sainath
>> <sainath.grandhi@intel.com>; Mahesh Bandewar <maheshb@google.com>
>> Subject: Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
>>
>> On Tue, 2017-05-02 at 15:08 +0000, Chiappero, Marco wrote:
>> > > -----Original Message-----
>> > > From: Dan Williams [mailto:dcbw@redhat.com] On Thu, 2017-04-27 at
>> > > 11:20 -0500, Dan Williams wrote:
>> > > > On Thu, 2017-04-27 at 15:51 +0100, Marco Chiappero wrote:
>> > > > > Currently all the slave devices belonging to the same port
>> > > > > inherit their MAC address from its master device. This patch
>> > > > > removes this limitation and allows every slave device to obtain
>> > > > > a unique MAC address, by default randomly generated at creation
>> > > > > time.
>> > > > >
>> > > > > Moreover it is now possible to correctly modify the MAC address
>> > > > > at any time, fixing an existing bug as MAC address changes on
>> > > > > the master were not reflected on the slaves. It also avoids
>> > > > > multiple interfaces sharing the same IPv6 link-local address.
>> > > >
>> > > > How is this different than macvlan now?
>> >
>> > The same way it was before. The purpose of the patch is to make
>> > possible to change the MAC address on slaves, not to change the
>> > external behavior of ipvlan: ipvlan will still behave as ipvlan,
>> > macvlan will still behave as macvlan.
>>
>> Ok, it was completely unclear from the commit message that the "internal" MAC
>> addresses of the ipvlan interfaces were not reflected "on the wire", but that this
>> was essentially (as you say below) MAC NAT.
>
> Sorry about that, I'll fix it in V2.
>
>> I think everyone agrees that being able to change the MAC is useful and was a
>> bug.
>>
>> What I'm still not clear on is, if IPv6 is already solved, why is it useful to have
>> assign ipvlan interface a unique MAC address?  Is it only to make interface
>> lookups via MAC easier?
>
> The main motivation is that some higher level management software expect interfaces on the same L2 broadcast domain to obviously have different MAC addresses, either out-of-the-box or via address change - or both.  Instead with ipvlan:
> - there is no real/formal segmentation between slaves
segmentation between slaves is at L3. If you want segmentation at L2
then use Macvlan

> - slaves share the same L2 address
Yes, that's by design!

> This looks conceptually wrong.
Why is it hard to view a device that does mux/demux using L3 while
keeping L2 same. Namespaces within physical box have different L3 and
have different routing needs too while all this enclosed inside one
host exposing one external L2. If some mgmt software doesn't like it
then either IPvlan is not the best fit for that solution or that
software needs an update.

>Yes, ipvlan works at L3 (which is an implementation detail anyway), but slaves are Ethernet interfaces and should behave as much as possible as such regardless, with an individual MAC address assigned.
>
> Additionally there is another related bug as it's currently possible to work around this limitation, although breaking the whole thing, by:
> 1) changing the MAC address on master from X to Y
> 2) creating a slave, receiving address Y
> 3) restoring the original MAC address X on master
>
> So, either we fix this by forcing slaves to stay in sync with master,
I have already mentioned that this is the only acceptable fix out of
this patchset but not by way of rewriting headers. The simplest fix is
to use the master->dev_addr in dev_hard_header()

> or correctly support independent MAC addresses, which would be IMO preferable for the above reasons.
>
> Best Regards,
> Marco
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
>
>
> This e-mail and any attachments may contain confidential material for the sole
> use of the intended recipient(s). Any review or distribution by others is
> strictly prohibited. If you are not the intended recipient, please contact the
> sender and delete all copies.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-05-04 16:43             ` Jiri Benc
@ 2017-05-04 22:15               ` Mahesh Bandewar (महेश बंडेवार)
  2017-05-08 15:29               ` Chiappero, Marco
  1 sibling, 0 replies; 27+ messages in thread
From: Mahesh Bandewar (महेश बंडेवार) @ 2017-05-04 22:15 UTC (permalink / raw)
  To: Jiri Benc
  Cc: Chiappero, Marco, Dan Williams, netdev, David S . Miller,
	Kirsher, Jeffrey T, Duyck, Alexander H, Grandhi, Sainath

On Thu, May 4, 2017 at 9:43 AM, Jiri Benc <jbenc@redhat.com> wrote:
> On Thu, 4 May 2017 09:37:00 +0000, Chiappero, Marco wrote:
>> This looks conceptually wrong. Yes, ipvlan works at L3 (which is an
>> implementation detail anyway), but slaves are Ethernet interfaces and
>> should behave as much as possible as such regardless, with an
>> individual MAC address assigned.
>
> Isn't the proper fix then converting ipvlan interfaces to be L3 only
> interfaces? I.e., ARPHRD_NONE? There's not much ipvlan can do with
> arbitrary Ethernet frames anyway. Of course, a flag to switch to the
> new behavior would be needed in order to preserve backwards
> compatibility.
>
There is mode = L3/L3s for that.

> This patchset looks very wrong. For proper support of multiple MAC
> addresses, we have macvlan and it's pointless to add that to ipvlan.
> And doing some kind of weird MAC NAT in ipvlan just to satisfy broken
> tools that can't cope with multiple interfaces with the same MAC address
> is wrong, too. Those tools are already broken anyway, there's nothing
> preventing anyone to set the same MAC address to multiple interfaces.
> I suppose those tools don't work with bonding and bridge, either?
>
+1

>> So, either we fix this by forcing slaves to stay in sync with master,
>
> Yes, that's the correct behavior. Well, at least as correct as one can
> get with the ipvlan broken design of pretending that an interface is L2
> when in fact, it is not.
>
conceptually view it as a single link (one L2) but mux/demux @ L3 for
multi-ns world with different routing needs without needing additional
packet processing.

>  Jiri

^ permalink raw reply	[flat|nested] 27+ messages in thread

* RE: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-05-04 16:43             ` Jiri Benc
  2017-05-04 22:15               ` Mahesh Bandewar (महेश बंडेवार)
@ 2017-05-08 15:29               ` Chiappero, Marco
  2017-05-08 18:13                 ` Dan Williams
  1 sibling, 1 reply; 27+ messages in thread
From: Chiappero, Marco @ 2017-05-08 15:29 UTC (permalink / raw)
  To: Jiri Benc
  Cc: Dan Williams, netdev, David S . Miller, Kirsher, Jeffrey T,
	Duyck, Alexander H, Grandhi, Sainath, Mahesh Bandewar


> -----Original Message-----
> From: Jiri Benc [mailto:jbenc@redhat.com]
> Sent: Thursday, May 4, 2017 5:44 PM
> To: Chiappero, Marco <marco.chiappero@intel.com>
> Cc: Dan Williams <dcbw@redhat.com>; netdev@vger.kernel.org; David S .
> Miller <davem@davemloft.net>; Kirsher, Jeffrey T
> <jeffrey.t.kirsher@intel.com>; Duyck, Alexander H
> <alexander.h.duyck@intel.com>; Grandhi, Sainath
> <sainath.grandhi@intel.com>; Mahesh Bandewar <maheshb@google.com>
> Subject: Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
> 
> On Thu, 4 May 2017 09:37:00 +0000, Chiappero, Marco wrote:
> > This looks conceptually wrong. Yes, ipvlan works at L3 (which is an
> > implementation detail anyway), but slaves are Ethernet interfaces and
> > should behave as much as possible as such regardless, with an
> > individual MAC address assigned.
> 
> Isn't the proper fix then converting ipvlan interfaces to be L3 only interfaces?
> I.e., ARPHRD_NONE? There's not much ipvlan can do with arbitrary Ethernet
> frames anyway. Of course, a flag to switch to the new behavior would be
> needed in order to preserve backwards compatibility.

Yes, L3 only interfaces would be a valid solution and a major differentiation with respect to macvlan. In fact it's been considered but abandoned because:
- it would be a break from the past
- VMs and containers usually expect Ethernet devices

Having that said, I'm ok with this solution if deemed preferable.
 
> This patchset looks very wrong. For proper support of multiple MAC addresses,
> we have macvlan and it's pointless to add that to ipvlan.

Ipvlan will never have proper support for L2, it's a L3 thing and doesn't aim at replacing macvlan. So, the options are:
1) remove it and provide L3 only interfaces - as you are proposing
2) fully emulate it preserving the single unicast MAC rule - my proposal

I'm open to both solutions, to me the important thing is to address the problems with the current implementation, one way or another, making it comply with basic networking concepts and expectations.

> And doing some kind of weird MAC NAT in ipvlan just to satisfy broken tools that
> can't cope with multiple interfaces with the same MAC address is wrong, too.

Ipvlan has always had the MAC issue, regardless, these tools simply make it more apparent. And as I said already, whether they are broken is debatable (yet I have to read a reasonable motivation). At the very least their expectation to have unique addresses on the same broadcast domain is hardly arguable. Should ipvlan considered special? Again, questionable.

> Those tools are already broken anyway, there's nothing preventing anyone to
> set the same MAC address to multiple interfaces.

What are you trying to demonstrate, that you can shoot yourself in the foot? That you can interfere with the controller? That you can intentionally break your network? Yes, of course you can, so what?
As long as network components comply with expected behaviors, including the ability to change the MAC address, every orchestrator will do its job of managing the network and will do it correctly.

> I suppose those tools don't work with bonding and bridge, either?

NaaS software and SDN controllers are built around bridges, a bridge is a well-defined, coherent, fundamental network component. As soon as ipvlan will behave coherently as well, upper layers will handle it properly.
By the way, none of them has the same limitations of ipvlan.
 
> > So, either we fix this by forcing slaves to stay in sync with master,
> 
> Yes, that's the correct behavior. Well, at least as correct as one can get with the
> ipvlan broken design of pretending that an interface is L2 when in fact, it is not.

Eventually we got there:  "ipvlan broken design". Let's fix it then. Would moving to L3 only interfaces be accepted instead?
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-05-08 15:29               ` Chiappero, Marco
@ 2017-05-08 18:13                 ` Dan Williams
  2017-05-09 18:41                   ` Chiappero, Marco
  0 siblings, 1 reply; 27+ messages in thread
From: Dan Williams @ 2017-05-08 18:13 UTC (permalink / raw)
  To: Chiappero, Marco, Jiri Benc
  Cc: netdev, David S . Miller, Kirsher, Jeffrey T, Duyck, Alexander H,
	Grandhi, Sainath, Mahesh Bandewar

On Mon, 2017-05-08 at 15:29 +0000, Chiappero, Marco wrote:
> > -----Original Message-----
> > From: Jiri Benc [mailto:jbenc@redhat.com]
> > Sent: Thursday, May 4, 2017 5:44 PM
> > To: Chiappero, Marco <marco.chiappero@intel.com>
> > Cc: Dan Williams <dcbw@redhat.com>; netdev@vger.kernel.org; David S
> > .
> > Miller <davem@davemloft.net>; Kirsher, Jeffrey T
> > <jeffrey.t.kirsher@intel.com>; Duyck, Alexander H
> > <alexander.h.duyck@intel.com>; Grandhi, Sainath
> > <sainath.grandhi@intel.com>; Mahesh Bandewar <maheshb@google.com>
> > Subject: Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC
> > addresses
> > 
> > On Thu, 4 May 2017 09:37:00 +0000, Chiappero, Marco wrote:
> > > This looks conceptually wrong. Yes, ipvlan works at L3 (which is
> > > an
> > > implementation detail anyway), but slaves are Ethernet interfaces
> > > and
> > > should behave as much as possible as such regardless, with an
> > > individual MAC address assigned.
> > 
> > Isn't the proper fix then converting ipvlan interfaces to be L3
> > only interfaces?
> > I.e., ARPHRD_NONE? There's not much ipvlan can do with arbitrary
> > Ethernet
> > frames anyway. Of course, a flag to switch to the new behavior
> > would be
> > needed in order to preserve backwards compatibility.
> 
> Yes, L3 only interfaces would be a valid solution and a major
> differentiation with respect to macvlan. In fact it's been considered
> but abandoned because:
> - it would be a break from the past
> - VMs and containers usually expect Ethernet devices
> 
> Having that said, I'm ok with this solution if deemed preferable.
>  
> > This patchset looks very wrong. For proper support of multiple MAC
> > addresses,
> > we have macvlan and it's pointless to add that to ipvlan.
> 
> Ipvlan will never have proper support for L2, it's a L3 thing and
> doesn't aim at replacing macvlan. So, the options are:
> 1) remove it and provide L3 only interfaces - as you are proposing
> 2) fully emulate it preserving the single unicast MAC rule - my
> proposal
> 
> I'm open to both solutions, to me the important thing is to address
> the problems with the current implementation, one way or another,
> making it comply with basic networking concepts and expectations.
> 
> > And doing some kind of weird MAC NAT in ipvlan just to satisfy
> > broken tools that
> > can't cope with multiple interfaces with the same MAC address is
> > wrong, too.
> 
> Ipvlan has always had the MAC issue, regardless, these tools simply
> make it more apparent. And as I said already, whether they are broken

If we're talking about the slaves being unable to change when the
parent MAC changes, everyone agrees that was a bug.

If we are talking about the "all slaves have the same MAC" then that's
not an "issue", that's the way it's designed.  Whether that design is
the best design possible is debatable, but that's the way it currently
is.

> is debatable (yet I have to read a reasonable motivation). At the 

Existing tools are already broken for bond slaves and a couple existing
drivers, see below.

Note that making the MACs unique would break DHCP functionality,
because now the MAC address encoded in the 'chaddr' field of the DHCP
packet would no longer correspond to the MAC address of the device that
DHCP replies should be received on.  The userspace process writes the
'chaddr' field, and the userspace process would see the unique (and
incorrect) MAC address.

Misrepresenting the MAC address of the device, as this would do, is
simply not going to work.  The tools that expect the unique MAC
addresses are already broken.

> very least their expectation to have unique addresses on the same
> broadcast domain is hardly arguable. Should ipvlan considered
> special? Again, questionable.

bond slaves
drivers/net/ethernet/toshiba/ps3_gelic_net.c
drivers/s390/net/qeth_l3_main.c

already all have the same MAC address for different netdevices.  Yeah,
not many people have PS3s anymore, but s390 qeth is fairly widely used.
 Just pointing out that ipvlan is hardly the first device to have
encountered this issue, or to have solved it this way.  qeth does
pretty much the same thing as ipvlan.

I'm not arguing that ipvlan is perfect.  I'm just arguing that in its
current form (eg, virtualized L2 device) making this change is
incorrect.

Dan

> > Those tools are already broken anyway, there's nothing preventing
> > anyone to
> > set the same MAC address to multiple interfaces.
> 
> What are you trying to demonstrate, that you can shoot yourself in
> the foot? That you can interfere with the controller? That you can
> intentionally break your network? Yes, of course you can, so what?
> As long as network components comply with expected behaviors,
> including the ability to change the MAC address, every orchestrator
> will do its job of managing the network and will do it correctly.
> 
> > I suppose those tools don't work with bonding and bridge, either?
> 
> NaaS software and SDN controllers are built around bridges, a bridge
> is a well-defined, coherent, fundamental network component. As soon
> as ipvlan will behave coherently as well, upper layers will handle it
> properly.
> By the way, none of them has the same limitations of ipvlan.
>  
> > > So, either we fix this by forcing slaves to stay in sync with
> > > master,
> > 
> > Yes, that's the correct behavior. Well, at least as correct as one
> > can get with the
> > ipvlan broken design of pretending that an interface is L2 when in
> > fact, it is not.
> 
> Eventually we got there:  "ipvlan broken design". Let's fix it then.
> Would moving to L3 only interfaces be accepted instead?
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County
> Kildare
> Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for
> the sole
> use of the intended recipient(s). Any review or distribution by
> others is
> strictly prohibited. If you are not the intended recipient, please
> contact the
> sender and delete all copies.
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* RE: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
  2017-05-08 18:13                 ` Dan Williams
@ 2017-05-09 18:41                   ` Chiappero, Marco
  0 siblings, 0 replies; 27+ messages in thread
From: Chiappero, Marco @ 2017-05-09 18:41 UTC (permalink / raw)
  To: Dan Williams, Jiri Benc
  Cc: netdev, David S . Miller, Kirsher, Jeffrey T, Duyck, Alexander H,
	Grandhi, Sainath, Mahesh Bandewar


> -----Original Message-----
> From: Dan Williams [mailto:dcbw@redhat.com]
> Sent: Monday, May 8, 2017 7:13 PM
> To: Chiappero, Marco <marco.chiappero@intel.com>; Jiri Benc
> <jbenc@redhat.com>
> Cc: netdev@vger.kernel.org; David S . Miller <davem@davemloft.net>; Kirsher,
> Jeffrey T <jeffrey.t.kirsher@intel.com>; Duyck, Alexander H
> <alexander.h.duyck@intel.com>; Grandhi, Sainath
> <sainath.grandhi@intel.com>; Mahesh Bandewar <maheshb@google.com>
> Subject: Re: [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses
> 
> On Mon, 2017-05-08 at 15:29 +0000, Chiappero, Marco wrote:
> > > -----Original Message-----
> > > From: Jiri Benc [mailto:jbenc@redhat.com]
> > > Sent: Thursday, May 4, 2017 5:44 PM
> > > To: Chiappero, Marco <marco.chiappero@intel.com>
> > > Cc: Dan Williams <dcbw@redhat.com>; netdev@vger.kernel.org; David S
> > > 
> > > And doing some kind of weird MAC NAT in ipvlan just to satisfy
> > > broken tools that can't cope with multiple interfaces with the same
> > > MAC address is wrong, too.
> >
> > Ipvlan has always had the MAC issue, regardless, these tools simply
> > make it more apparent. And as I said already, whether they are broken
> 
> If we're talking about the slaves being unable to change when the parent MAC
> changes, everyone agrees that was a bug.
> 
> If we are talking about the "all slaves have the same MAC" then that's not an
> "issue", that's the way it's designed.  Whether that design is the best design
> possible is debatable, but that's the way it currently is.

I'm referring to the latter, the former is obvious. That "design" looks like a temporary workaround to me, in fact a couple of TODOs in the code lead to believe it is. They should be removed, at the very least.

> > is debatable (yet I have to read a reasonable motivation). At the
> 
> Existing tools are already broken for bond slaves and a couple existing drivers,
> see below.
> 
> Note that making the MACs unique would break DHCP functionality, because
> now the MAC address encoded in the 'chaddr' field of the DHCP packet would
> no longer correspond to the MAC address of the device that DHCP replies should
> be received on.  The userspace process writes the 'chaddr' field, and the
> userspace process would see the unique (and
> incorrect) MAC address.

Fair point. However, despite not fixing the issues with DHCP, it would be no way worse that it is now (even though I admit I don't like much the workaround). BTW, I don't know about the ISC upstream version, but on Debian/Ubuntu the -B flag is not available, which makes ipvlan+DHCP non-viable on a very large number of deployments.

> > very least their expectation to have unique addresses on the same
> > broadcast domain is hardly arguable. Should ipvlan considered special?
> > Again, questionable.
> 
> bond slaves
> drivers/net/ethernet/toshiba/ps3_gelic_net.c
> drivers/s390/net/qeth_l3_main.c
> 
> already all have the same MAC address for different netdevices.  Yeah, not many
> people have PS3s anymore, but s390 qeth is fairly widely used.
>  Just pointing out that ipvlan is hardly the first device to have encountered this
> issue, or to have solved it this way.  qeth does pretty much the same thing as
> ipvlan.
> 
> I'm not arguing that ipvlan is perfect.  I'm just arguing that in its current form
> (eg, virtualized L2 device) making this change is incorrect.

Don't get me wrong, I understand and appreciate your lengthy reply, even though the fact that a poor solution is already included elsewhere doesn't make it any better.
However, regardless of the uniqueness topic, I don't feel is completely right to change the whole world upstairs making it ipvlan aware either, unless there is a real and coherent differentiation (e.g. L3 only interfaces). I'll drop considering ipvlan as an option for now.


Regards,
Marco

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2017-05-09 18:41 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-27 14:51 [PATCH net-next 0/9] support unique MAC addresses for slave devices Marco Chiappero
2017-04-27 14:51 ` [PATCH net-next 1/9] ipvlan: fix coding style for the ipvlan tree Marco Chiappero
2017-04-27 14:51 ` [PATCH net-next 2/9] ipvlan: refactor ipvlan_process_multicast for readability Marco Chiappero
2017-04-27 14:51 ` [PATCH net-next 3/9] ipvlan: replace ipvlan_rcv_frame Marco Chiappero
2017-04-27 14:51 ` [PATCH net-next 4/9] ipvlan: rework the IP lookup function Marco Chiappero
2017-04-27 14:51 ` [PATCH net-next 5/9] ipvlan: improve and uniform naming Marco Chiappero
2017-04-27 14:51 ` [PATCH net-next 6/9] ipvlan: reposition three functions Marco Chiappero
2017-04-27 14:51 ` [PATCH net-next 7/9] ipvlan: relocate ipvlan_skb_crossing_ns calls Marco Chiappero
2017-04-27 14:51 ` [PATCH net-next 8/9] ipvlan: improve compiler hints Marco Chiappero
2017-04-27 15:21   ` Duyck, Alexander H
2017-04-27 15:27     ` David Laight
2017-04-27 16:05     ` David Miller
2017-04-27 14:51 ` [PATCH net-next 9/9] ipvlan: introduce individual MAC addresses Marco Chiappero
2017-04-27 16:20   ` Dan Williams
2017-04-27 16:23     ` Dan Williams
2017-05-02 15:08       ` Chiappero, Marco
2017-05-02 16:09         ` Dan Williams
2017-05-04  9:37           ` Chiappero, Marco
2017-05-04 16:43             ` Jiri Benc
2017-05-04 22:15               ` Mahesh Bandewar (महेश बंडेवार)
2017-05-08 15:29               ` Chiappero, Marco
2017-05-08 18:13                 ` Dan Williams
2017-05-09 18:41                   ` Chiappero, Marco
2017-05-04 22:09             ` Mahesh Bandewar (महेश बंडेवार)
2017-04-27 20:30   ` kbuild test robot
2017-04-27 19:21 ` [PATCH net-next 0/9] support unique MAC addresses for slave devices Mahesh Bandewar (महेश बंडेवार)
2017-05-02 15:12   ` Chiappero, Marco

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.