linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode
@ 2021-12-24 14:25 Sun Shouxin
  2021-12-24 22:57 ` Jakub Kicinski
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Sun Shouxin @ 2021-12-24 14:25 UTC (permalink / raw)
  To: j.vosburgh, vfalico, andy, davem, kuba; +Cc: netdev, linux-kernel, huyd12

Since ipv6 neighbor solicitation and advertisement messages
isn't handled gracefully in bonding6 driver, we can see packet
drop due to inconsistency bewteen mac address in the option
message and source MAC .

Another examples is ipv6 neighbor solicitation and advertisement
messages from VM via tap attached to host brighe, the src mac
mighe be changed through balance-alb mode, but it is not synced
with Link-layer address in the option message.

The patch implements bond6's tx handle for ipv6 neighbor
solicitation and advertisement messages.

Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
---
 drivers/net/bonding/bond_alb.c | 149 +++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 533e476988f2..485e4863a365 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -22,6 +22,8 @@
 #include <asm/byteorder.h>
 #include <net/bonding.h>
 #include <net/bond_alb.h>
+#include <net/ndisc.h>
+#include <net/ip6_checksum.h>
 
 static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
@@ -1269,6 +1271,137 @@ static int alb_set_mac_address(struct bonding *bond, void *addr)
 	return res;
 }
 
+/*determine if the packet is NA or NS*/
+static bool __alb_determine_nd(struct icmp6hdr *hdr)
+{
+	if (hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
+	    hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
+		return true;
+	}
+
+	return false;
+}
+
+static void alb_change_nd_option(struct sk_buff *skb, void *data)
+{
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+	struct net_device *dev = skb->dev;
+	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
+	struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				offsetof(struct nd_msg, opt));
+
+	while (ndoptlen) {
+		int l;
+
+		switch (nd_opt->nd_opt_type) {
+		case ND_OPT_SOURCE_LL_ADDR:
+		case ND_OPT_TARGET_LL_ADDR:
+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
+			break;
+
+		default:
+			lladdr = NULL;
+			break;
+		}
+
+		l = nd_opt->nd_opt_len << 3;
+
+		if (ndoptlen < l || l == 0)
+			return;
+
+		if (lladdr) {
+			memcpy(lladdr, data, dev->addr_len);
+			icmp6h->icmp6_cksum = 0;
+
+			icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
+							      &ip6hdr->daddr,
+						ntohs(ip6hdr->payload_len),
+						IPPROTO_ICMPV6,
+						csum_partial(icmp6h,
+							     ntohs(ip6hdr->payload_len), 0));
+			return;
+		}
+		ndoptlen -= l;
+		nd_opt = ((void *)nd_opt) + l;
+	}
+}
+
+static u8 *alb_get_lladdr(struct sk_buff *skb)
+{
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+	struct net_device *dev = skb->dev;
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				offsetof(struct nd_msg, opt));
+
+	while (ndoptlen) {
+		int l;
+
+		switch (nd_opt->nd_opt_type) {
+		case ND_OPT_SOURCE_LL_ADDR:
+		case ND_OPT_TARGET_LL_ADDR:
+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
+			break;
+
+		default:
+			break;
+		}
+
+		l = nd_opt->nd_opt_len << 3;
+
+		if (ndoptlen < l || l == 0)
+			return NULL;
+
+		if (lladdr)
+			return lladdr;
+
+		ndoptlen -= l;
+		nd_opt = ((void *)nd_opt) + l;
+	}
+
+	return lladdr;
+}
+
+static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
+			      struct slave *tx_slave)
+{
+	struct ipv6hdr *ip6hdr;
+	struct icmp6hdr *hdr;
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (tx_slave && tx_slave !=
+		    rcu_access_pointer(bond->curr_active_slave)) {
+			ip6hdr = ipv6_hdr(skb);
+			if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
+				hdr = icmp6_hdr(skb);
+				if (__alb_determine_nd(hdr))
+					alb_change_nd_option(skb, tx_slave->dev->dev_addr);
+			}
+		}
+	}
+}
+
+static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
+{
+	struct ipv6hdr *ip6hdr;
+	struct icmp6hdr *hdr;
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		ip6hdr = ipv6_hdr(skb);
+		if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
+			hdr = icmp6_hdr(skb);
+			if (__alb_determine_nd(hdr))
+				return true;
+		}
+	}
+
+	return false;
+}
+
 /************************ exported alb functions ************************/
 
 int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
@@ -1350,6 +1483,9 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
 		switch (skb->protocol) {
 		case htons(ETH_P_IP):
 		case htons(ETH_P_IPV6):
+			if (alb_determine_nd(skb, bond))
+				break;
+
 			hash_index = bond_xmit_hash(bond, skb);
 			if (bond->params.tlb_dynamic_lb) {
 				tx_slave = tlb_choose_channel(bond,
@@ -1446,6 +1582,18 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
 			break;
 		}
 
+		if (alb_determine_nd(skb, bond)) {
+			u8 *lladdr;
+
+			lladdr = alb_get_lladdr(skb);
+			if (lladdr) {
+				if (!bond_slave_has_mac_rx(bond, lladdr)) {
+					do_tx_balance = false;
+					break;
+				}
+			}
+		}
+
 		hash_start = (char *)&ip6hdr->daddr;
 		hash_size = sizeof(ip6hdr->daddr);
 		break;
@@ -1489,6 +1637,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	struct slave *tx_slave = NULL;
 
 	tx_slave = bond_xmit_alb_slave_get(bond, skb);
+	alb_set_nd_option(skb, bond, tx_slave);
 	return bond_do_alb_xmit(skb, bond, tx_slave);
 }
 

base-commit: 7a29b11da9651ef6a970e2f6bfd276f053aeb06a
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode
  2021-12-24 14:25 [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode Sun Shouxin
@ 2021-12-24 22:57 ` Jakub Kicinski
  2021-12-27 10:06 ` 回复: " huyd12
  2021-12-27 20:36 ` Jay Vosburgh
  2 siblings, 0 replies; 7+ messages in thread
From: Jakub Kicinski @ 2021-12-24 22:57 UTC (permalink / raw)
  To: Sun Shouxin
  Cc: j.vosburgh, vfalico, andy, davem, netdev, linux-kernel, huyd12

On Fri, 24 Dec 2021 09:25:12 -0500 Sun Shouxin wrote:
> Since ipv6 neighbor solicitation and advertisement messages
> isn't handled gracefully in bonding6 driver, we can see packet
> drop due to inconsistency bewteen mac address in the option
> message and source MAC .
> 
> Another examples is ipv6 neighbor solicitation and advertisement
> messages from VM via tap attached to host brighe, the src mac
> mighe be changed through balance-alb mode, but it is not synced
> with Link-layer address in the option message.
> 
> The patch implements bond6's tx handle for ipv6 neighbor
> solicitation and advertisement messages.
> 
> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
> Reported-by: kernel test robot <lkp@intel.com>

Still breaks allmodconfig build.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* 回复: [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode
  2021-12-24 14:25 [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode Sun Shouxin
  2021-12-24 22:57 ` Jakub Kicinski
@ 2021-12-27 10:06 ` huyd12
  2021-12-27 20:36 ` Jay Vosburgh
  2 siblings, 0 replies; 7+ messages in thread
From: huyd12 @ 2021-12-27 10:06 UTC (permalink / raw)
  To: sunshouxin, j.vosburgh, vfalico, andy, davem, kuba; +Cc: netdev, linux-kernel


Hi,

I noticed one warning info on spinics, it said the patch "Still breaks
allmodconfig build".
I wonder why I didn't get it ,but I still rebuild it with allmodconfig, no
any warning jump out.
Anyone can light shouxin a bit?

Thanks.


------
胡亚弟
 
中国电信股份有限公司云计算分公司 研发一部
Mobile: 15810133835
北京市海淀中街15号远中悦莱
Email: huyd12@chinatelecom.cn
------------------------------------------------------------------------

-----邮件原件-----
发件人: sunshouxin@chinatelecom.cn <sunshouxin@chinatelecom.cn> 
发送时间: 2021年12月24日 22:25
收件人: j.vosburgh@gmail.com; vfalico@gmail.com; andy@greyhouse.net;
davem@davemloft.net; kuba@kernel.org
抄送: netdev@vger.kernel.org; linux-kernel@vger.kernel.org;
huyd12@chinatelecom.cn
主题: [PATCH v5] net: bonding: Add support for IPV6 ns/na to
balance-alb/balance-tlb mode

Since ipv6 neighbor solicitation and advertisement messages isn't handled
gracefully in bonding6 driver, we can see packet drop due to inconsistency
bewteen mac address in the option message and source MAC .

Another examples is ipv6 neighbor solicitation and advertisement messages
from VM via tap attached to host brighe, the src mac mighe be changed
through balance-alb mode, but it is not synced with Link-layer address in
the option message.

The patch implements bond6's tx handle for ipv6 neighbor solicitation and
advertisement messages.

Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
---
 drivers/net/bonding/bond_alb.c | 149 +++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 533e476988f2..485e4863a365 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -22,6 +22,8 @@
 #include <asm/byteorder.h>
 #include <net/bonding.h>
 #include <net/bond_alb.h>
+#include <net/ndisc.h>
+#include <net/ip6_checksum.h>
 
 static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
@@ -1269,6 +1271,137 @@ static int alb_set_mac_address(struct bonding *bond,
void *addr)
 	return res;
 }
 
+/*determine if the packet is NA or NS*/ static bool 
+__alb_determine_nd(struct icmp6hdr *hdr) {
+	if (hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
+	    hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
+		return true;
+	}
+
+	return false;
+}
+
+static void alb_change_nd_option(struct sk_buff *skb, void *data) {
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+	struct net_device *dev = skb->dev;
+	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
+	struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				offsetof(struct nd_msg, opt));
+
+	while (ndoptlen) {
+		int l;
+
+		switch (nd_opt->nd_opt_type) {
+		case ND_OPT_SOURCE_LL_ADDR:
+		case ND_OPT_TARGET_LL_ADDR:
+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
+			break;
+
+		default:
+			lladdr = NULL;
+			break;
+		}
+
+		l = nd_opt->nd_opt_len << 3;
+
+		if (ndoptlen < l || l == 0)
+			return;
+
+		if (lladdr) {
+			memcpy(lladdr, data, dev->addr_len);
+			icmp6h->icmp6_cksum = 0;
+
+			icmp6h->icmp6_cksum =
csum_ipv6_magic(&ip6hdr->saddr,
+							
&ip6hdr->daddr,
+						ntohs(ip6hdr->payload_len),
+						IPPROTO_ICMPV6,
+						csum_partial(icmp6h,
+							
ntohs(ip6hdr->payload_len), 0));
+			return;
+		}
+		ndoptlen -= l;
+		nd_opt = ((void *)nd_opt) + l;
+	}
+}
+
+static u8 *alb_get_lladdr(struct sk_buff *skb) {
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+	struct net_device *dev = skb->dev;
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				offsetof(struct nd_msg, opt));
+
+	while (ndoptlen) {
+		int l;
+
+		switch (nd_opt->nd_opt_type) {
+		case ND_OPT_SOURCE_LL_ADDR:
+		case ND_OPT_TARGET_LL_ADDR:
+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
+			break;
+
+		default:
+			break;
+		}
+
+		l = nd_opt->nd_opt_len << 3;
+
+		if (ndoptlen < l || l == 0)
+			return NULL;
+
+		if (lladdr)
+			return lladdr;
+
+		ndoptlen -= l;
+		nd_opt = ((void *)nd_opt) + l;
+	}
+
+	return lladdr;
+}
+
+static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
+			      struct slave *tx_slave)
+{
+	struct ipv6hdr *ip6hdr;
+	struct icmp6hdr *hdr;
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (tx_slave && tx_slave !=
+		    rcu_access_pointer(bond->curr_active_slave)) {
+			ip6hdr = ipv6_hdr(skb);
+			if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
+				hdr = icmp6_hdr(skb);
+				if (__alb_determine_nd(hdr))
+					alb_change_nd_option(skb,
tx_slave->dev->dev_addr);
+			}
+		}
+	}
+}
+
+static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond) 
+{
+	struct ipv6hdr *ip6hdr;
+	struct icmp6hdr *hdr;
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		ip6hdr = ipv6_hdr(skb);
+		if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
+			hdr = icmp6_hdr(skb);
+			if (__alb_determine_nd(hdr))
+				return true;
+		}
+	}
+
+	return false;
+}
+
 /************************ exported alb functions ************************/
 
 int bond_alb_initialize(struct bonding *bond, int rlb_enabled) @@ -1350,6
+1483,9 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
 		switch (skb->protocol) {
 		case htons(ETH_P_IP):
 		case htons(ETH_P_IPV6):
+			if (alb_determine_nd(skb, bond))
+				break;
+
 			hash_index = bond_xmit_hash(bond, skb);
 			if (bond->params.tlb_dynamic_lb) {
 				tx_slave = tlb_choose_channel(bond, @@
-1446,6 +1582,18 @@ struct slave *bond_xmit_alb_slave_get(struct bonding
*bond,
 			break;
 		}
 
+		if (alb_determine_nd(skb, bond)) {
+			u8 *lladdr;
+
+			lladdr = alb_get_lladdr(skb);
+			if (lladdr) {
+				if (!bond_slave_has_mac_rx(bond, lladdr)) {
+					do_tx_balance = false;
+					break;
+				}
+			}
+		}
+
 		hash_start = (char *)&ip6hdr->daddr;
 		hash_size = sizeof(ip6hdr->daddr);
 		break;
@@ -1489,6 +1637,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct
net_device *bond_dev)
 	struct slave *tx_slave = NULL;
 
 	tx_slave = bond_xmit_alb_slave_get(bond, skb);
+	alb_set_nd_option(skb, bond, tx_slave);
 	return bond_do_alb_xmit(skb, bond, tx_slave);  }
 

base-commit: 7a29b11da9651ef6a970e2f6bfd276f053aeb06a
--
2.27.0



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode
  2021-12-24 14:25 [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode Sun Shouxin
  2021-12-24 22:57 ` Jakub Kicinski
  2021-12-27 10:06 ` 回复: " huyd12
@ 2021-12-27 20:36 ` Jay Vosburgh
  2021-12-28  3:01   ` 孙守鑫
  2 siblings, 1 reply; 7+ messages in thread
From: Jay Vosburgh @ 2021-12-27 20:36 UTC (permalink / raw)
  To: Sun Shouxin; +Cc: vfalico, andy, davem, kuba, netdev, linux-kernel, huyd12

Sun Shouxin <sunshouxin@chinatelecom.cn> wrote:

>Since ipv6 neighbor solicitation and advertisement messages
>isn't handled gracefully in bonding6 driver, we can see packet
>drop due to inconsistency bewteen mac address in the option
>message and source MAC .
>
>Another examples is ipv6 neighbor solicitation and advertisement
>messages from VM via tap attached to host brighe, the src mac
>mighe be changed through balance-alb mode, but it is not synced
>with Link-layer address in the option message.
>
>The patch implements bond6's tx handle for ipv6 neighbor
>solicitation and advertisement messages.

	I'm not sure what you've changed here for v5 as there's no
changelog, but I believe the observed problems to be a transmit side
effect (i.e., it is induced by the balance-tlb mode balancing of
outgoing traffic).  As such, the tlb side will rebalance all of the
traffic every ten seconds, so any MAC ND_OPT_*_LL_ADDR option
assignments in the outgoing NS/NA datagrams will only be valid for that
length of time, correct?

	The topology diagram and example that you've removed from the
commit log with v5 said, in part, that the issue arose because the
LL_ADDR option MAC didn't match the actual source MAC.  Since tlb mode
can reshuffle the flows every ten seconds, how did the proposed solution
work reliably?

	In any event, my real question is whether simply disabling tlb
balancing for NS/NA datagrams will resolve the observed issues (i.e.,
have bond_xmit_tlb_slave_get return NULL for IPv6 NS/NA datagrams).
Doing so will cause all NS/NA traffic to egress through the active
interface.  There's already a test in your logic to check for the
tx_slave != bond->curr_active_slave, so presumably everything works
correctly if the NS/NA goes out on the curr_active_slave.  If the "edit
NS/NA datagrams" solution works even in the face of rebalance of
traffic, then would simply assigning all NS/NA traffic to the
curr_active_slave eliminate the problem?

	-J

>Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>Reported-by: kernel test robot <lkp@intel.com>
>Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>---
> drivers/net/bonding/bond_alb.c | 149 +++++++++++++++++++++++++++++++++
> 1 file changed, 149 insertions(+)
>
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index 533e476988f2..485e4863a365 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -22,6 +22,8 @@
> #include <asm/byteorder.h>
> #include <net/bonding.h>
> #include <net/bond_alb.h>
>+#include <net/ndisc.h>
>+#include <net/ip6_checksum.h>
> 
> static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
> 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
>@@ -1269,6 +1271,137 @@ static int alb_set_mac_address(struct bonding *bond, void *addr)
> 	return res;
> }
> 
>+/*determine if the packet is NA or NS*/
>+static bool __alb_determine_nd(struct icmp6hdr *hdr)
>+{
>+	if (hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
>+	    hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
>+		return true;
>+	}
>+
>+	return false;
>+}
>+
>+static void alb_change_nd_option(struct sk_buff *skb, void *data)
>+{
>+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>+	struct net_device *dev = skb->dev;
>+	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
>+	struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
>+	u8 *lladdr = NULL;
>+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>+				offsetof(struct nd_msg, opt));
>+
>+	while (ndoptlen) {
>+		int l;
>+
>+		switch (nd_opt->nd_opt_type) {
>+		case ND_OPT_SOURCE_LL_ADDR:
>+		case ND_OPT_TARGET_LL_ADDR:
>+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
>+			break;
>+
>+		default:
>+			lladdr = NULL;
>+			break;
>+		}
>+
>+		l = nd_opt->nd_opt_len << 3;
>+
>+		if (ndoptlen < l || l == 0)
>+			return;
>+
>+		if (lladdr) {
>+			memcpy(lladdr, data, dev->addr_len);
>+			icmp6h->icmp6_cksum = 0;
>+
>+			icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
>+							      &ip6hdr->daddr,
>+						ntohs(ip6hdr->payload_len),
>+						IPPROTO_ICMPV6,
>+						csum_partial(icmp6h,
>+							     ntohs(ip6hdr->payload_len), 0));
>+			return;
>+		}
>+		ndoptlen -= l;
>+		nd_opt = ((void *)nd_opt) + l;
>+	}
>+}
>+
>+static u8 *alb_get_lladdr(struct sk_buff *skb)
>+{
>+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>+	struct net_device *dev = skb->dev;
>+	u8 *lladdr = NULL;
>+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>+				offsetof(struct nd_msg, opt));
>+
>+	while (ndoptlen) {
>+		int l;
>+
>+		switch (nd_opt->nd_opt_type) {
>+		case ND_OPT_SOURCE_LL_ADDR:
>+		case ND_OPT_TARGET_LL_ADDR:
>+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
>+			break;
>+
>+		default:
>+			break;
>+		}
>+
>+		l = nd_opt->nd_opt_len << 3;
>+
>+		if (ndoptlen < l || l == 0)
>+			return NULL;
>+
>+		if (lladdr)
>+			return lladdr;
>+
>+		ndoptlen -= l;
>+		nd_opt = ((void *)nd_opt) + l;
>+	}
>+
>+	return lladdr;
>+}
>+
>+static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
>+			      struct slave *tx_slave)
>+{
>+	struct ipv6hdr *ip6hdr;
>+	struct icmp6hdr *hdr;
>+
>+	if (skb->protocol == htons(ETH_P_IPV6)) {
>+		if (tx_slave && tx_slave !=
>+		    rcu_access_pointer(bond->curr_active_slave)) {
>+			ip6hdr = ipv6_hdr(skb);
>+			if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>+				hdr = icmp6_hdr(skb);
>+				if (__alb_determine_nd(hdr))
>+					alb_change_nd_option(skb, tx_slave->dev->dev_addr);
>+			}
>+		}
>+	}
>+}
>+
>+static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
>+{
>+	struct ipv6hdr *ip6hdr;
>+	struct icmp6hdr *hdr;
>+
>+	if (skb->protocol == htons(ETH_P_IPV6)) {
>+		ip6hdr = ipv6_hdr(skb);
>+		if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>+			hdr = icmp6_hdr(skb);
>+			if (__alb_determine_nd(hdr))
>+				return true;
>+		}
>+	}
>+
>+	return false;
>+}
>+
> /************************ exported alb functions ************************/
> 
> int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>@@ -1350,6 +1483,9 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
> 		switch (skb->protocol) {
> 		case htons(ETH_P_IP):
> 		case htons(ETH_P_IPV6):
>+			if (alb_determine_nd(skb, bond))
>+				break;
>+
> 			hash_index = bond_xmit_hash(bond, skb);
> 			if (bond->params.tlb_dynamic_lb) {
> 				tx_slave = tlb_choose_channel(bond,
>@@ -1446,6 +1582,18 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
> 			break;
> 		}
> 
>+		if (alb_determine_nd(skb, bond)) {
>+			u8 *lladdr;
>+
>+			lladdr = alb_get_lladdr(skb);
>+			if (lladdr) {
>+				if (!bond_slave_has_mac_rx(bond, lladdr)) {
>+					do_tx_balance = false;
>+					break;
>+				}
>+			}
>+		}
>+
> 		hash_start = (char *)&ip6hdr->daddr;
> 		hash_size = sizeof(ip6hdr->daddr);
> 		break;
>@@ -1489,6 +1637,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
> 	struct slave *tx_slave = NULL;
> 
> 	tx_slave = bond_xmit_alb_slave_get(bond, skb);
>+	alb_set_nd_option(skb, bond, tx_slave);
> 	return bond_do_alb_xmit(skb, bond, tx_slave);
> }
> 
>
>base-commit: 7a29b11da9651ef6a970e2f6bfd276f053aeb06a
>-- 
>2.27.0
>

---
	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode
  2021-12-27 20:36 ` Jay Vosburgh
@ 2021-12-28  3:01   ` 孙守鑫
  2022-01-04  8:35     ` 孙守鑫
  0 siblings, 1 reply; 7+ messages in thread
From: 孙守鑫 @ 2021-12-28  3:01 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: vfalico, andy, davem, kuba, netdev, linux-kernel, huyd12


在 2021/12/28 4:36, Jay Vosburgh 写道:
> Sun Shouxin <sunshouxin@chinatelecom.cn> wrote:
>
>> Since ipv6 neighbor solicitation and advertisement messages
>> isn't handled gracefully in bonding6 driver, we can see packet
>> drop due to inconsistency bewteen mac address in the option
>> message and source MAC .
>>
>> Another examples is ipv6 neighbor solicitation and advertisement
>> messages from VM via tap attached to host brighe, the src mac
>> mighe be changed through balance-alb mode, but it is not synced
>> with Link-layer address in the option message.
>>
>> The patch implements bond6's tx handle for ipv6 neighbor
>> solicitation and advertisement messages.
> 	I'm not sure what you've changed here for v5 as there's no
> changelog, but I believe the observed problems to be a transmit side
> effect (i.e., it is induced by the balance-tlb mode balancing of
> outgoing traffic).  As such, the tlb side will rebalance all of the
> traffic every ten seconds, so any MAC ND_OPT_*_LL_ADDR option
> assignments in the outgoing NS/NA datagrams will only be valid for that
> length of time, correct?


Yes,  MAC ND_OPT_*_LL_ADDR option assignments in the outgoing NS/NA
datagrams will only be valid for that length of time ,and then,
it will be inconsistensy in the next ten seconds.


> 	The topology diagram and example that you've removed from the
> commit log with v5 said, in part, that the issue arose because the
> LL_ADDR option MAC didn't match the actual source MAC.  Since tlb mode
> can reshuffle the flows every ten seconds, how did the proposed solution
> work reliably?


In function alb_set_nd_option, we will change the LL_ADDR option Mac
to the source Mac. This could work in this conditon.


>
> 	In any event, my real question is whether simply disabling tlb
> balancing for NS/NA datagrams will resolve the observed issues (i.e.,
> have bond_xmit_tlb_slave_get return NULL for IPv6 NS/NA datagrams).
> Doing so will cause all NS/NA traffic to egress through the active
> interface.  There's already a test in your logic to check for the
> tx_slave != bond->curr_active_slave, so presumably everything works
> correctly if the NS/NA goes out on the curr_active_slave.  If the "edit
> NS/NA datagrams" solution works even in the face of rebalance of
> traffic, then would simply assigning all NS/NA traffic to the
> curr_active_slave eliminate the problem?


Yes, assigning all Ns/Na traffic to the curr_active_slave can resolve the
difference between mac in the Ns/Na options with the source mac.
But this makes the rlb doesn't work in the alb mode,
one interface with bond6 will not receive any ingress packets.
It is mismatch Bond6 specification.


>
> 	-J
>
>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>> Reported-by: kernel test robot <lkp@intel.com>
>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>> ---
>> drivers/net/bonding/bond_alb.c | 149 +++++++++++++++++++++++++++++++++
>> 1 file changed, 149 insertions(+)
>>
>> diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>> index 533e476988f2..485e4863a365 100644
>> --- a/drivers/net/bonding/bond_alb.c
>> +++ b/drivers/net/bonding/bond_alb.c
>> @@ -22,6 +22,8 @@
>> #include <asm/byteorder.h>
>> #include <net/bonding.h>
>> #include <net/bond_alb.h>
>> +#include <net/ndisc.h>
>> +#include <net/ip6_checksum.h>
>>
>> static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
>> 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
>> @@ -1269,6 +1271,137 @@ static int alb_set_mac_address(struct bonding *bond, void *addr)
>> 	return res;
>> }
>>
>> +/*determine if the packet is NA or NS*/
>> +static bool __alb_determine_nd(struct icmp6hdr *hdr)
>> +{
>> +	if (hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
>> +	    hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
>> +		return true;
>> +	}
>> +
>> +	return false;
>> +}
>> +
>> +static void alb_change_nd_option(struct sk_buff *skb, void *data)
>> +{
>> +	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>> +	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>> +	struct net_device *dev = skb->dev;
>> +	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
>> +	struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
>> +	u8 *lladdr = NULL;
>> +	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>> +				offsetof(struct nd_msg, opt));
>> +
>> +	while (ndoptlen) {
>> +		int l;
>> +
>> +		switch (nd_opt->nd_opt_type) {
>> +		case ND_OPT_SOURCE_LL_ADDR:
>> +		case ND_OPT_TARGET_LL_ADDR:
>> +			lladdr = ndisc_opt_addr_data(nd_opt, dev);
>> +			break;
>> +
>> +		default:
>> +			lladdr = NULL;
>> +			break;
>> +		}
>> +
>> +		l = nd_opt->nd_opt_len << 3;
>> +
>> +		if (ndoptlen < l || l == 0)
>> +			return;
>> +
>> +		if (lladdr) {
>> +			memcpy(lladdr, data, dev->addr_len);
>> +			icmp6h->icmp6_cksum = 0;
>> +
>> +			icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
>> +							      &ip6hdr->daddr,
>> +						ntohs(ip6hdr->payload_len),
>> +						IPPROTO_ICMPV6,
>> +						csum_partial(icmp6h,
>> +							     ntohs(ip6hdr->payload_len), 0));
>> +			return;
>> +		}
>> +		ndoptlen -= l;
>> +		nd_opt = ((void *)nd_opt) + l;
>> +	}
>> +}
>> +
>> +static u8 *alb_get_lladdr(struct sk_buff *skb)
>> +{
>> +	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>> +	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>> +	struct net_device *dev = skb->dev;
>> +	u8 *lladdr = NULL;
>> +	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>> +				offsetof(struct nd_msg, opt));
>> +
>> +	while (ndoptlen) {
>> +		int l;
>> +
>> +		switch (nd_opt->nd_opt_type) {
>> +		case ND_OPT_SOURCE_LL_ADDR:
>> +		case ND_OPT_TARGET_LL_ADDR:
>> +			lladdr = ndisc_opt_addr_data(nd_opt, dev);
>> +			break;
>> +
>> +		default:
>> +			break;
>> +		}
>> +
>> +		l = nd_opt->nd_opt_len << 3;
>> +
>> +		if (ndoptlen < l || l == 0)
>> +			return NULL;
>> +
>> +		if (lladdr)
>> +			return lladdr;
>> +
>> +		ndoptlen -= l;
>> +		nd_opt = ((void *)nd_opt) + l;
>> +	}
>> +
>> +	return lladdr;
>> +}
>> +
>> +static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
>> +			      struct slave *tx_slave)
>> +{
>> +	struct ipv6hdr *ip6hdr;
>> +	struct icmp6hdr *hdr;
>> +
>> +	if (skb->protocol == htons(ETH_P_IPV6)) {
>> +		if (tx_slave && tx_slave !=
>> +		    rcu_access_pointer(bond->curr_active_slave)) {
>> +			ip6hdr = ipv6_hdr(skb);
>> +			if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>> +				hdr = icmp6_hdr(skb);
>> +				if (__alb_determine_nd(hdr))
>> +					alb_change_nd_option(skb, tx_slave->dev->dev_addr);
>> +			}
>> +		}
>> +	}
>> +}
>> +
>> +static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
>> +{
>> +	struct ipv6hdr *ip6hdr;
>> +	struct icmp6hdr *hdr;
>> +
>> +	if (skb->protocol == htons(ETH_P_IPV6)) {
>> +		ip6hdr = ipv6_hdr(skb);
>> +		if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>> +			hdr = icmp6_hdr(skb);
>> +			if (__alb_determine_nd(hdr))
>> +				return true;
>> +		}
>> +	}
>> +
>> +	return false;
>> +}
>> +
>> /************************ exported alb functions ************************/
>>
>> int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>> @@ -1350,6 +1483,9 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
>> 		switch (skb->protocol) {
>> 		case htons(ETH_P_IP):
>> 		case htons(ETH_P_IPV6):
>> +			if (alb_determine_nd(skb, bond))
>> +				break;
>> +
>> 			hash_index = bond_xmit_hash(bond, skb);
>> 			if (bond->params.tlb_dynamic_lb) {
>> 				tx_slave = tlb_choose_channel(bond,
>> @@ -1446,6 +1582,18 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
>> 			break;
>> 		}
>>
>> +		if (alb_determine_nd(skb, bond)) {
>> +			u8 *lladdr;
>> +
>> +			lladdr = alb_get_lladdr(skb);
>> +			if (lladdr) {
>> +				if (!bond_slave_has_mac_rx(bond, lladdr)) {
>> +					do_tx_balance = false;
>> +					break;
>> +				}
>> +			}
>> +		}
>> +
>> 		hash_start = (char *)&ip6hdr->daddr;
>> 		hash_size = sizeof(ip6hdr->daddr);
>> 		break;
>> @@ -1489,6 +1637,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
>> 	struct slave *tx_slave = NULL;
>>
>> 	tx_slave = bond_xmit_alb_slave_get(bond, skb);
>> +	alb_set_nd_option(skb, bond, tx_slave);
>> 	return bond_do_alb_xmit(skb, bond, tx_slave);
>> }
>>
>>
>> base-commit: 7a29b11da9651ef6a970e2f6bfd276f053aeb06a
>> -- 
>> 2.27.0
>>
> ---
> 	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode
  2021-12-28  3:01   ` 孙守鑫
@ 2022-01-04  8:35     ` 孙守鑫
  2022-01-07  8:13       ` 孙守鑫
  0 siblings, 1 reply; 7+ messages in thread
From: 孙守鑫 @ 2022-01-04  8:35 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: vfalico, andy, davem, kuba, netdev, linux-kernel, huyd12


Any comments will be appreciated.


在 2021/12/28 11:01, 孙守鑫 写道:
>
> 在 2021/12/28 4:36, Jay Vosburgh 写道:
>> Sun Shouxin <sunshouxin@chinatelecom.cn> wrote:
>>
>>> Since ipv6 neighbor solicitation and advertisement messages
>>> isn't handled gracefully in bonding6 driver, we can see packet
>>> drop due to inconsistency bewteen mac address in the option
>>> message and source MAC .
>>>
>>> Another examples is ipv6 neighbor solicitation and advertisement
>>> messages from VM via tap attached to host brighe, the src mac
>>> mighe be changed through balance-alb mode, but it is not synced
>>> with Link-layer address in the option message.
>>>
>>> The patch implements bond6's tx handle for ipv6 neighbor
>>> solicitation and advertisement messages.
>>     I'm not sure what you've changed here for v5 as there's no
>> changelog, but I believe the observed problems to be a transmit side
>> effect (i.e., it is induced by the balance-tlb mode balancing of
>> outgoing traffic).  As such, the tlb side will rebalance all of the
>> traffic every ten seconds, so any MAC ND_OPT_*_LL_ADDR option
>> assignments in the outgoing NS/NA datagrams will only be valid for that
>> length of time, correct?
>
>
> Yes,  MAC ND_OPT_*_LL_ADDR option assignments in the outgoing NS/NA
> datagrams will only be valid for that length of time ,and then,
> it will be inconsistensy in the next ten seconds.
>
>
>>     The topology diagram and example that you've removed from the
>> commit log with v5 said, in part, that the issue arose because the
>> LL_ADDR option MAC didn't match the actual source MAC.  Since tlb mode
>> can reshuffle the flows every ten seconds, how did the proposed solution
>> work reliably?
>
>
> In function alb_set_nd_option, we will change the LL_ADDR option Mac
> to the source Mac. This could work in this conditon.
>
>
>>
>>     In any event, my real question is whether simply disabling tlb
>> balancing for NS/NA datagrams will resolve the observed issues (i.e.,
>> have bond_xmit_tlb_slave_get return NULL for IPv6 NS/NA datagrams).
>> Doing so will cause all NS/NA traffic to egress through the active
>> interface.  There's already a test in your logic to check for the
>> tx_slave != bond->curr_active_slave, so presumably everything works
>> correctly if the NS/NA goes out on the curr_active_slave.  If the "edit
>> NS/NA datagrams" solution works even in the face of rebalance of
>> traffic, then would simply assigning all NS/NA traffic to the
>> curr_active_slave eliminate the problem?
>
>
> Yes, assigning all Ns/Na traffic to the curr_active_slave can resolve the
> difference between mac in the Ns/Na options with the source mac.
> But this makes the rlb doesn't work in the alb mode,
> one interface with bond6 will not receive any ingress packets.
> It is mismatch Bond6 specification.
>
>
>>
>>     -J
>>
>>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>>> Reported-by: kernel test robot <lkp@intel.com>
>>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>>> ---
>>> drivers/net/bonding/bond_alb.c | 149 +++++++++++++++++++++++++++++++++
>>> 1 file changed, 149 insertions(+)
>>>
>>> diff --git a/drivers/net/bonding/bond_alb.c 
>>> b/drivers/net/bonding/bond_alb.c
>>> index 533e476988f2..485e4863a365 100644
>>> --- a/drivers/net/bonding/bond_alb.c
>>> +++ b/drivers/net/bonding/bond_alb.c
>>> @@ -22,6 +22,8 @@
>>> #include <asm/byteorder.h>
>>> #include <net/bonding.h>
>>> #include <net/bond_alb.h>
>>> +#include <net/ndisc.h>
>>> +#include <net/ip6_checksum.h>
>>>
>>> static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
>>>     0x33, 0x33, 0x00, 0x00, 0x00, 0x01
>>> @@ -1269,6 +1271,137 @@ static int alb_set_mac_address(struct 
>>> bonding *bond, void *addr)
>>>     return res;
>>> }
>>>
>>> +/*determine if the packet is NA or NS*/
>>> +static bool __alb_determine_nd(struct icmp6hdr *hdr)
>>> +{
>>> +    if (hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
>>> +        hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
>>> +        return true;
>>> +    }
>>> +
>>> +    return false;
>>> +}
>>> +
>>> +static void alb_change_nd_option(struct sk_buff *skb, void *data)
>>> +{
>>> +    struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>>> +    struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>>> +    struct net_device *dev = skb->dev;
>>> +    struct icmp6hdr *icmp6h = icmp6_hdr(skb);
>>> +    struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
>>> +    u8 *lladdr = NULL;
>>> +    u32 ndoptlen = skb_tail_pointer(skb) - 
>>> (skb_transport_header(skb) +
>>> +                offsetof(struct nd_msg, opt));
>>> +
>>> +    while (ndoptlen) {
>>> +        int l;
>>> +
>>> +        switch (nd_opt->nd_opt_type) {
>>> +        case ND_OPT_SOURCE_LL_ADDR:
>>> +        case ND_OPT_TARGET_LL_ADDR:
>>> +            lladdr = ndisc_opt_addr_data(nd_opt, dev);
>>> +            break;
>>> +
>>> +        default:
>>> +            lladdr = NULL;
>>> +            break;
>>> +        }
>>> +
>>> +        l = nd_opt->nd_opt_len << 3;
>>> +
>>> +        if (ndoptlen < l || l == 0)
>>> +            return;
>>> +
>>> +        if (lladdr) {
>>> +            memcpy(lladdr, data, dev->addr_len);
>>> +            icmp6h->icmp6_cksum = 0;
>>> +
>>> +            icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
>>> +                                  &ip6hdr->daddr,
>>> +                        ntohs(ip6hdr->payload_len),
>>> +                        IPPROTO_ICMPV6,
>>> +                        csum_partial(icmp6h,
>>> + ntohs(ip6hdr->payload_len), 0));
>>> +            return;
>>> +        }
>>> +        ndoptlen -= l;
>>> +        nd_opt = ((void *)nd_opt) + l;
>>> +    }
>>> +}
>>> +
>>> +static u8 *alb_get_lladdr(struct sk_buff *skb)
>>> +{
>>> +    struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>>> +    struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>>> +    struct net_device *dev = skb->dev;
>>> +    u8 *lladdr = NULL;
>>> +    u32 ndoptlen = skb_tail_pointer(skb) - 
>>> (skb_transport_header(skb) +
>>> +                offsetof(struct nd_msg, opt));
>>> +
>>> +    while (ndoptlen) {
>>> +        int l;
>>> +
>>> +        switch (nd_opt->nd_opt_type) {
>>> +        case ND_OPT_SOURCE_LL_ADDR:
>>> +        case ND_OPT_TARGET_LL_ADDR:
>>> +            lladdr = ndisc_opt_addr_data(nd_opt, dev);
>>> +            break;
>>> +
>>> +        default:
>>> +            break;
>>> +        }
>>> +
>>> +        l = nd_opt->nd_opt_len << 3;
>>> +
>>> +        if (ndoptlen < l || l == 0)
>>> +            return NULL;
>>> +
>>> +        if (lladdr)
>>> +            return lladdr;
>>> +
>>> +        ndoptlen -= l;
>>> +        nd_opt = ((void *)nd_opt) + l;
>>> +    }
>>> +
>>> +    return lladdr;
>>> +}
>>> +
>>> +static void alb_set_nd_option(struct sk_buff *skb, struct bonding 
>>> *bond,
>>> +                  struct slave *tx_slave)
>>> +{
>>> +    struct ipv6hdr *ip6hdr;
>>> +    struct icmp6hdr *hdr;
>>> +
>>> +    if (skb->protocol == htons(ETH_P_IPV6)) {
>>> +        if (tx_slave && tx_slave !=
>>> +            rcu_access_pointer(bond->curr_active_slave)) {
>>> +            ip6hdr = ipv6_hdr(skb);
>>> +            if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>>> +                hdr = icmp6_hdr(skb);
>>> +                if (__alb_determine_nd(hdr))
>>> +                    alb_change_nd_option(skb, 
>>> tx_slave->dev->dev_addr);
>>> +            }
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +static bool alb_determine_nd(struct sk_buff *skb, struct bonding 
>>> *bond)
>>> +{
>>> +    struct ipv6hdr *ip6hdr;
>>> +    struct icmp6hdr *hdr;
>>> +
>>> +    if (skb->protocol == htons(ETH_P_IPV6)) {
>>> +        ip6hdr = ipv6_hdr(skb);
>>> +        if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>>> +            hdr = icmp6_hdr(skb);
>>> +            if (__alb_determine_nd(hdr))
>>> +                return true;
>>> +        }
>>> +    }
>>> +
>>> +    return false;
>>> +}
>>> +
>>> /************************ exported alb functions 
>>> ************************/
>>>
>>> int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>>> @@ -1350,6 +1483,9 @@ struct slave *bond_xmit_tlb_slave_get(struct 
>>> bonding *bond,
>>>         switch (skb->protocol) {
>>>         case htons(ETH_P_IP):
>>>         case htons(ETH_P_IPV6):
>>> +            if (alb_determine_nd(skb, bond))
>>> +                break;
>>> +
>>>             hash_index = bond_xmit_hash(bond, skb);
>>>             if (bond->params.tlb_dynamic_lb) {
>>>                 tx_slave = tlb_choose_channel(bond,
>>> @@ -1446,6 +1582,18 @@ struct slave *bond_xmit_alb_slave_get(struct 
>>> bonding *bond,
>>>             break;
>>>         }
>>>
>>> +        if (alb_determine_nd(skb, bond)) {
>>> +            u8 *lladdr;
>>> +
>>> +            lladdr = alb_get_lladdr(skb);
>>> +            if (lladdr) {
>>> +                if (!bond_slave_has_mac_rx(bond, lladdr)) {
>>> +                    do_tx_balance = false;
>>> +                    break;
>>> +                }
>>> +            }
>>> +        }
>>> +
>>>         hash_start = (char *)&ip6hdr->daddr;
>>>         hash_size = sizeof(ip6hdr->daddr);
>>>         break;
>>> @@ -1489,6 +1637,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, 
>>> struct net_device *bond_dev)
>>>     struct slave *tx_slave = NULL;
>>>
>>>     tx_slave = bond_xmit_alb_slave_get(bond, skb);
>>> +    alb_set_nd_option(skb, bond, tx_slave);
>>>     return bond_do_alb_xmit(skb, bond, tx_slave);
>>> }
>>>
>>>
>>> base-commit: 7a29b11da9651ef6a970e2f6bfd276f053aeb06a
>>> -- 
>>> 2.27.0
>>>
>> ---
>>     -Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode
  2022-01-04  8:35     ` 孙守鑫
@ 2022-01-07  8:13       ` 孙守鑫
  0 siblings, 0 replies; 7+ messages in thread
From: 孙守鑫 @ 2022-01-07  8:13 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: vfalico, andy, davem, kuba, netdev, linux-kernel, huyd12


Hi guys

Any comment?

thanks a lot.


在 2022/1/4 16:35, 孙守鑫 写道:
>
> Any comments will be appreciated.
>
>
> 在 2021/12/28 11:01, 孙守鑫 写道:
>>
>> 在 2021/12/28 4:36, Jay Vosburgh 写道:
>>> Sun Shouxin <sunshouxin@chinatelecom.cn> wrote:
>>>
>>>> Since ipv6 neighbor solicitation and advertisement messages
>>>> isn't handled gracefully in bonding6 driver, we can see packet
>>>> drop due to inconsistency bewteen mac address in the option
>>>> message and source MAC .
>>>>
>>>> Another examples is ipv6 neighbor solicitation and advertisement
>>>> messages from VM via tap attached to host brighe, the src mac
>>>> mighe be changed through balance-alb mode, but it is not synced
>>>> with Link-layer address in the option message.
>>>>
>>>> The patch implements bond6's tx handle for ipv6 neighbor
>>>> solicitation and advertisement messages.
>>>     I'm not sure what you've changed here for v5 as there's no
>>> changelog, but I believe the observed problems to be a transmit side
>>> effect (i.e., it is induced by the balance-tlb mode balancing of
>>> outgoing traffic).  As such, the tlb side will rebalance all of the
>>> traffic every ten seconds, so any MAC ND_OPT_*_LL_ADDR option
>>> assignments in the outgoing NS/NA datagrams will only be valid for that
>>> length of time, correct?
>>
>>
>> Yes,  MAC ND_OPT_*_LL_ADDR option assignments in the outgoing NS/NA
>> datagrams will only be valid for that length of time ,and then,
>> it will be inconsistensy in the next ten seconds.
>>
>>
>>>     The topology diagram and example that you've removed from the
>>> commit log with v5 said, in part, that the issue arose because the
>>> LL_ADDR option MAC didn't match the actual source MAC.  Since tlb mode
>>> can reshuffle the flows every ten seconds, how did the proposed 
>>> solution
>>> work reliably?
>>
>>
>> In function alb_set_nd_option, we will change the LL_ADDR option Mac
>> to the source Mac. This could work in this conditon.
>>
>>
>>>
>>>     In any event, my real question is whether simply disabling tlb
>>> balancing for NS/NA datagrams will resolve the observed issues (i.e.,
>>> have bond_xmit_tlb_slave_get return NULL for IPv6 NS/NA datagrams).
>>> Doing so will cause all NS/NA traffic to egress through the active
>>> interface.  There's already a test in your logic to check for the
>>> tx_slave != bond->curr_active_slave, so presumably everything works
>>> correctly if the NS/NA goes out on the curr_active_slave.  If the "edit
>>> NS/NA datagrams" solution works even in the face of rebalance of
>>> traffic, then would simply assigning all NS/NA traffic to the
>>> curr_active_slave eliminate the problem?
>>
>>
>> Yes, assigning all Ns/Na traffic to the curr_active_slave can resolve 
>> the
>> difference between mac in the Ns/Na options with the source mac.
>> But this makes the rlb doesn't work in the alb mode,
>> one interface with bond6 will not receive any ingress packets.
>> It is mismatch Bond6 specification.
>>
>>
>>>
>>>     -J
>>>
>>>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>>>> Reported-by: kernel test robot <lkp@intel.com>
>>>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>>>> ---
>>>> drivers/net/bonding/bond_alb.c | 149 +++++++++++++++++++++++++++++++++
>>>> 1 file changed, 149 insertions(+)
>>>>
>>>> diff --git a/drivers/net/bonding/bond_alb.c 
>>>> b/drivers/net/bonding/bond_alb.c
>>>> index 533e476988f2..485e4863a365 100644
>>>> --- a/drivers/net/bonding/bond_alb.c
>>>> +++ b/drivers/net/bonding/bond_alb.c
>>>> @@ -22,6 +22,8 @@
>>>> #include <asm/byteorder.h>
>>>> #include <net/bonding.h>
>>>> #include <net/bond_alb.h>
>>>> +#include <net/ndisc.h>
>>>> +#include <net/ip6_checksum.h>
>>>>
>>>> static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
>>>>     0x33, 0x33, 0x00, 0x00, 0x00, 0x01
>>>> @@ -1269,6 +1271,137 @@ static int alb_set_mac_address(struct 
>>>> bonding *bond, void *addr)
>>>>     return res;
>>>> }
>>>>
>>>> +/*determine if the packet is NA or NS*/
>>>> +static bool __alb_determine_nd(struct icmp6hdr *hdr)
>>>> +{
>>>> +    if (hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
>>>> +        hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
>>>> +        return true;
>>>> +    }
>>>> +
>>>> +    return false;
>>>> +}
>>>> +
>>>> +static void alb_change_nd_option(struct sk_buff *skb, void *data)
>>>> +{
>>>> +    struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>>>> +    struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>>>> +    struct net_device *dev = skb->dev;
>>>> +    struct icmp6hdr *icmp6h = icmp6_hdr(skb);
>>>> +    struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
>>>> +    u8 *lladdr = NULL;
>>>> +    u32 ndoptlen = skb_tail_pointer(skb) - 
>>>> (skb_transport_header(skb) +
>>>> +                offsetof(struct nd_msg, opt));
>>>> +
>>>> +    while (ndoptlen) {
>>>> +        int l;
>>>> +
>>>> +        switch (nd_opt->nd_opt_type) {
>>>> +        case ND_OPT_SOURCE_LL_ADDR:
>>>> +        case ND_OPT_TARGET_LL_ADDR:
>>>> +            lladdr = ndisc_opt_addr_data(nd_opt, dev);
>>>> +            break;
>>>> +
>>>> +        default:
>>>> +            lladdr = NULL;
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        l = nd_opt->nd_opt_len << 3;
>>>> +
>>>> +        if (ndoptlen < l || l == 0)
>>>> +            return;
>>>> +
>>>> +        if (lladdr) {
>>>> +            memcpy(lladdr, data, dev->addr_len);
>>>> +            icmp6h->icmp6_cksum = 0;
>>>> +
>>>> +            icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
>>>> +                                  &ip6hdr->daddr,
>>>> +                        ntohs(ip6hdr->payload_len),
>>>> +                        IPPROTO_ICMPV6,
>>>> +                        csum_partial(icmp6h,
>>>> + ntohs(ip6hdr->payload_len), 0));
>>>> +            return;
>>>> +        }
>>>> +        ndoptlen -= l;
>>>> +        nd_opt = ((void *)nd_opt) + l;
>>>> +    }
>>>> +}
>>>> +
>>>> +static u8 *alb_get_lladdr(struct sk_buff *skb)
>>>> +{
>>>> +    struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>>>> +    struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>>>> +    struct net_device *dev = skb->dev;
>>>> +    u8 *lladdr = NULL;
>>>> +    u32 ndoptlen = skb_tail_pointer(skb) - 
>>>> (skb_transport_header(skb) +
>>>> +                offsetof(struct nd_msg, opt));
>>>> +
>>>> +    while (ndoptlen) {
>>>> +        int l;
>>>> +
>>>> +        switch (nd_opt->nd_opt_type) {
>>>> +        case ND_OPT_SOURCE_LL_ADDR:
>>>> +        case ND_OPT_TARGET_LL_ADDR:
>>>> +            lladdr = ndisc_opt_addr_data(nd_opt, dev);
>>>> +            break;
>>>> +
>>>> +        default:
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        l = nd_opt->nd_opt_len << 3;
>>>> +
>>>> +        if (ndoptlen < l || l == 0)
>>>> +            return NULL;
>>>> +
>>>> +        if (lladdr)
>>>> +            return lladdr;
>>>> +
>>>> +        ndoptlen -= l;
>>>> +        nd_opt = ((void *)nd_opt) + l;
>>>> +    }
>>>> +
>>>> +    return lladdr;
>>>> +}
>>>> +
>>>> +static void alb_set_nd_option(struct sk_buff *skb, struct bonding 
>>>> *bond,
>>>> +                  struct slave *tx_slave)
>>>> +{
>>>> +    struct ipv6hdr *ip6hdr;
>>>> +    struct icmp6hdr *hdr;
>>>> +
>>>> +    if (skb->protocol == htons(ETH_P_IPV6)) {
>>>> +        if (tx_slave && tx_slave !=
>>>> +            rcu_access_pointer(bond->curr_active_slave)) {
>>>> +            ip6hdr = ipv6_hdr(skb);
>>>> +            if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>>>> +                hdr = icmp6_hdr(skb);
>>>> +                if (__alb_determine_nd(hdr))
>>>> +                    alb_change_nd_option(skb, 
>>>> tx_slave->dev->dev_addr);
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +}
>>>> +
>>>> +static bool alb_determine_nd(struct sk_buff *skb, struct bonding 
>>>> *bond)
>>>> +{
>>>> +    struct ipv6hdr *ip6hdr;
>>>> +    struct icmp6hdr *hdr;
>>>> +
>>>> +    if (skb->protocol == htons(ETH_P_IPV6)) {
>>>> +        ip6hdr = ipv6_hdr(skb);
>>>> +        if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>>>> +            hdr = icmp6_hdr(skb);
>>>> +            if (__alb_determine_nd(hdr))
>>>> +                return true;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return false;
>>>> +}
>>>> +
>>>> /************************ exported alb functions 
>>>> ************************/
>>>>
>>>> int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>>>> @@ -1350,6 +1483,9 @@ struct slave *bond_xmit_tlb_slave_get(struct 
>>>> bonding *bond,
>>>>         switch (skb->protocol) {
>>>>         case htons(ETH_P_IP):
>>>>         case htons(ETH_P_IPV6):
>>>> +            if (alb_determine_nd(skb, bond))
>>>> +                break;
>>>> +
>>>>             hash_index = bond_xmit_hash(bond, skb);
>>>>             if (bond->params.tlb_dynamic_lb) {
>>>>                 tx_slave = tlb_choose_channel(bond,
>>>> @@ -1446,6 +1582,18 @@ struct slave *bond_xmit_alb_slave_get(struct 
>>>> bonding *bond,
>>>>             break;
>>>>         }
>>>>
>>>> +        if (alb_determine_nd(skb, bond)) {
>>>> +            u8 *lladdr;
>>>> +
>>>> +            lladdr = alb_get_lladdr(skb);
>>>> +            if (lladdr) {
>>>> +                if (!bond_slave_has_mac_rx(bond, lladdr)) {
>>>> +                    do_tx_balance = false;
>>>> +                    break;
>>>> +                }
>>>> +            }
>>>> +        }
>>>> +
>>>>         hash_start = (char *)&ip6hdr->daddr;
>>>>         hash_size = sizeof(ip6hdr->daddr);
>>>>         break;
>>>> @@ -1489,6 +1637,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff 
>>>> *skb, struct net_device *bond_dev)
>>>>     struct slave *tx_slave = NULL;
>>>>
>>>>     tx_slave = bond_xmit_alb_slave_get(bond, skb);
>>>> +    alb_set_nd_option(skb, bond, tx_slave);
>>>>     return bond_do_alb_xmit(skb, bond, tx_slave);
>>>> }
>>>>
>>>>
>>>> base-commit: 7a29b11da9651ef6a970e2f6bfd276f053aeb06a
>>>> -- 
>>>> 2.27.0
>>>>
>>> ---
>>>     -Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-01-07  8:13 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-24 14:25 [PATCH v5] net: bonding: Add support for IPV6 ns/na to balance-alb/balance-tlb mode Sun Shouxin
2021-12-24 22:57 ` Jakub Kicinski
2021-12-27 10:06 ` 回复: " huyd12
2021-12-27 20:36 ` Jay Vosburgh
2021-12-28  3:01   ` 孙守鑫
2022-01-04  8:35     ` 孙守鑫
2022-01-07  8:13       ` 孙守鑫

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).