All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] net: bonding: Add support for IPV6 ns/na
@ 2021-12-09  6:50 Sun Shouxin
  2021-12-09 11:57   ` kernel test robot
  2021-12-09 20:22 ` Jay Vosburgh
  0 siblings, 2 replies; 5+ messages in thread
From: Sun Shouxin @ 2021-12-09  6:50 UTC (permalink / raw)
  To: j.vosburgh, vfalico, andy, davem, kuba; +Cc: netdev, linux-kernel, huyd12

Since ipv6 neighbor solicitation and advertisement messages
isn't handled gracefully in bonding6 driver, we can see packet
drop due to inconsistency bewteen mac address in the option
message and source MAC .

Another examples is ipv6 neighbor solicitation and advertisement
messages from VM via tap attached to host brighe, the src mac
mighe be changed through balance-alb mode, but it is not synced
with Link-layer address in the option message.

The patch implements bond6's tx handle for ipv6 neighbor
solicitation and advertisement messages.

Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
---
 drivers/net/bonding/bond_alb.c | 127 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 2ec8e01..01566ba 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -22,6 +22,7 @@
 #include <asm/byteorder.h>
 #include <net/bonding.h>
 #include <net/bond_alb.h>
+#include <net/ndisc.h>
 
 static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
@@ -1269,6 +1270,112 @@ static int alb_set_mac_address(struct bonding *bond, void *addr)
 	return res;
 }
 
+static void alb_change_nd_option(struct sk_buff *skb, void *data)
+{
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+	struct net_device *dev = skb->dev;
+	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
+	struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				offsetof(struct nd_msg, opt));
+
+	while (ndoptlen) {
+		int l;
+
+		switch (nd_opt->nd_opt_type) {
+		case ND_OPT_SOURCE_LL_ADDR:
+		case ND_OPT_TARGET_LL_ADDR:
+		lladdr = ndisc_opt_addr_data(nd_opt, dev);
+		break;
+
+		default:
+		break;
+		}
+
+		l = nd_opt->nd_opt_len << 3;
+
+		if (ndoptlen < l || l == 0)
+			return;
+
+		if (lladdr) {
+			memcpy(lladdr, data, dev->addr_len);
+			lladdr = NULL;
+			icmp6h->icmp6_cksum = 0;
+
+			icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
+							      &ip6hdr->daddr,
+						ntohs(ip6hdr->payload_len),
+						IPPROTO_ICMPV6,
+						csum_partial(icmp6h,
+							     ntohs(ip6hdr->payload_len), 0));
+			lladdr = NULL;
+		}
+		ndoptlen -= l;
+		nd_opt = ((void *)nd_opt) + l;
+	}
+}
+
+static u8 *alb_get_lladdr(struct sk_buff *skb)
+{
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+	struct net_device *dev = skb->dev;
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				offsetof(struct nd_msg, opt));
+
+	while (ndoptlen) {
+		int l;
+
+		switch (nd_opt->nd_opt_type) {
+		case ND_OPT_SOURCE_LL_ADDR:
+		case ND_OPT_TARGET_LL_ADDR:
+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
+			break;
+
+		default:
+			break;
+		}
+
+		l = nd_opt->nd_opt_len << 3;
+
+		if (ndoptlen < l || l == 0)
+			return lladdr;
+
+		if (lladdr)
+			return lladdr;
+
+		ndoptlen -= l;
+		nd_opt = ((void *)nd_opt) + l;
+	}
+
+	return lladdr;
+}
+
+static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
+			      struct slave *tx_slave)
+{
+	struct ipv6hdr *ip6hdr;
+	struct icmp6hdr *hdr = NULL;
+
+	if (tx_slave && tx_slave != rcu_access_pointer(bond->curr_active_slave)) {
+		if (ntohs(skb->protocol) == ETH_P_IPV6) {
+			ip6hdr = ipv6_hdr(skb);
+			if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
+				hdr = icmp6_hdr(skb);
+				if (hdr->icmp6_type ==
+				    NDISC_NEIGHBOUR_ADVERTISEMENT ||
+				     hdr->icmp6_type ==
+				     NDISC_NEIGHBOUR_SOLICITATION) {
+					alb_change_nd_option(skb, tx_slave->dev->dev_addr);
+				}
+			}
+		}
+	}
+}
+
 /************************ exported alb functions ************************/
 
 int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
@@ -1415,6 +1522,7 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
 	}
 	case ETH_P_IPV6: {
 		const struct ipv6hdr *ip6hdr;
+		struct icmp6hdr *hdr = NULL;
 
 		/* IPv6 doesn't really use broadcast mac address, but leave
 		 * that here just in case.
@@ -1446,6 +1554,24 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
 			break;
 		}
 
+		if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
+			hdr = icmp6_hdr(skb);
+			if (hdr->icmp6_type ==
+			    NDISC_NEIGHBOUR_ADVERTISEMENT ||
+			    hdr->icmp6_type ==
+			    NDISC_NEIGHBOUR_SOLICITATION) {
+				u8 *lladdr = NULL;
+
+				lladdr = alb_get_lladdr(skb);
+				if (lladdr) {
+					if (!bond_slave_has_mac_rx(bond, lladdr)) {
+						do_tx_balance = false;
+						break;
+					}
+				}
+			}
+		}
+
 		hash_start = (char *)&ip6hdr->daddr;
 		hash_size = sizeof(ip6hdr->daddr);
 		break;
@@ -1489,6 +1615,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	struct slave *tx_slave = NULL;
 
 	tx_slave = bond_xmit_alb_slave_get(bond, skb);
+	alb_set_nd_option(skb, bond, tx_slave);
 	return bond_do_alb_xmit(skb, bond, tx_slave);
 }
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] net: bonding: Add support for IPV6 ns/na
  2021-12-09  6:50 [PATCH] net: bonding: Add support for IPV6 ns/na Sun Shouxin
@ 2021-12-09 11:57   ` kernel test robot
  2021-12-09 20:22 ` Jay Vosburgh
  1 sibling, 0 replies; 5+ messages in thread
From: kernel test robot @ 2021-12-09 11:57 UTC (permalink / raw)
  To: Sun Shouxin, j.vosburgh, vfalico, andy, davem, kuba
  Cc: llvm, kbuild-all, netdev, linux-kernel, huyd12

Hi Sun,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.16-rc4 next-20211208]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sun-Shouxin/net-bonding-Add-support-for-IPV6-ns-na/20211209-150108
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 2a987e65025e2b79c6d453b78cb5985ac6e5eb26
config: riscv-randconfig-c006-20211209 (https://download.01.org/0day-ci/archive/20211209/202112091907.6iLel0c9-lkp@intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 097a1cb1d5ebb3a0ec4bcaed8ba3ff6a8e33c00a)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/0day-ci/linux/commit/ab724c314fcdcaa60e70c590850b2ce57430d7fa
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sun-Shouxin/net-bonding-Add-support-for-IPV6-ns-na/20211209-150108
        git checkout ab724c314fcdcaa60e70c590850b2ce57430d7fa
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash drivers/net/bonding/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/net/bonding/bond_alb.c:1307:26: error: implicit declaration of function 'csum_ipv6_magic' [-Werror,-Wimplicit-function-declaration]
                           icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
                                                 ^
   drivers/net/bonding/bond_alb.c:1307:26: note: did you mean 'csum_tcpudp_magic'?
   include/asm-generic/checksum.h:52:1: note: 'csum_tcpudp_magic' declared here
   csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
   ^
   1 error generated.


vim +/csum_ipv6_magic +1307 drivers/net/bonding/bond_alb.c

  1272	
  1273	static void alb_change_nd_option(struct sk_buff *skb, void *data)
  1274	{
  1275		struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
  1276		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
  1277		struct net_device *dev = skb->dev;
  1278		struct icmp6hdr *icmp6h = icmp6_hdr(skb);
  1279		struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
  1280		u8 *lladdr = NULL;
  1281		u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
  1282					offsetof(struct nd_msg, opt));
  1283	
  1284		while (ndoptlen) {
  1285			int l;
  1286	
  1287			switch (nd_opt->nd_opt_type) {
  1288			case ND_OPT_SOURCE_LL_ADDR:
  1289			case ND_OPT_TARGET_LL_ADDR:
  1290			lladdr = ndisc_opt_addr_data(nd_opt, dev);
  1291			break;
  1292	
  1293			default:
  1294			break;
  1295			}
  1296	
  1297			l = nd_opt->nd_opt_len << 3;
  1298	
  1299			if (ndoptlen < l || l == 0)
  1300				return;
  1301	
  1302			if (lladdr) {
  1303				memcpy(lladdr, data, dev->addr_len);
  1304				lladdr = NULL;
  1305				icmp6h->icmp6_cksum = 0;
  1306	
> 1307				icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
  1308								      &ip6hdr->daddr,
  1309							ntohs(ip6hdr->payload_len),
  1310							IPPROTO_ICMPV6,
  1311							csum_partial(icmp6h,
  1312								     ntohs(ip6hdr->payload_len), 0));
  1313				lladdr = NULL;
  1314			}
  1315			ndoptlen -= l;
  1316			nd_opt = ((void *)nd_opt) + l;
  1317		}
  1318	}
  1319	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net: bonding: Add support for IPV6 ns/na
@ 2021-12-09 11:57   ` kernel test robot
  0 siblings, 0 replies; 5+ messages in thread
From: kernel test robot @ 2021-12-09 11:57 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4035 bytes --]

Hi Sun,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.16-rc4 next-20211208]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sun-Shouxin/net-bonding-Add-support-for-IPV6-ns-na/20211209-150108
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 2a987e65025e2b79c6d453b78cb5985ac6e5eb26
config: riscv-randconfig-c006-20211209 (https://download.01.org/0day-ci/archive/20211209/202112091907.6iLel0c9-lkp(a)intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 097a1cb1d5ebb3a0ec4bcaed8ba3ff6a8e33c00a)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/0day-ci/linux/commit/ab724c314fcdcaa60e70c590850b2ce57430d7fa
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sun-Shouxin/net-bonding-Add-support-for-IPV6-ns-na/20211209-150108
        git checkout ab724c314fcdcaa60e70c590850b2ce57430d7fa
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash drivers/net/bonding/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/net/bonding/bond_alb.c:1307:26: error: implicit declaration of function 'csum_ipv6_magic' [-Werror,-Wimplicit-function-declaration]
                           icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
                                                 ^
   drivers/net/bonding/bond_alb.c:1307:26: note: did you mean 'csum_tcpudp_magic'?
   include/asm-generic/checksum.h:52:1: note: 'csum_tcpudp_magic' declared here
   csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
   ^
   1 error generated.


vim +/csum_ipv6_magic +1307 drivers/net/bonding/bond_alb.c

  1272	
  1273	static void alb_change_nd_option(struct sk_buff *skb, void *data)
  1274	{
  1275		struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
  1276		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
  1277		struct net_device *dev = skb->dev;
  1278		struct icmp6hdr *icmp6h = icmp6_hdr(skb);
  1279		struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
  1280		u8 *lladdr = NULL;
  1281		u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
  1282					offsetof(struct nd_msg, opt));
  1283	
  1284		while (ndoptlen) {
  1285			int l;
  1286	
  1287			switch (nd_opt->nd_opt_type) {
  1288			case ND_OPT_SOURCE_LL_ADDR:
  1289			case ND_OPT_TARGET_LL_ADDR:
  1290			lladdr = ndisc_opt_addr_data(nd_opt, dev);
  1291			break;
  1292	
  1293			default:
  1294			break;
  1295			}
  1296	
  1297			l = nd_opt->nd_opt_len << 3;
  1298	
  1299			if (ndoptlen < l || l == 0)
  1300				return;
  1301	
  1302			if (lladdr) {
  1303				memcpy(lladdr, data, dev->addr_len);
  1304				lladdr = NULL;
  1305				icmp6h->icmp6_cksum = 0;
  1306	
> 1307				icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
  1308								      &ip6hdr->daddr,
  1309							ntohs(ip6hdr->payload_len),
  1310							IPPROTO_ICMPV6,
  1311							csum_partial(icmp6h,
  1312								     ntohs(ip6hdr->payload_len), 0));
  1313				lladdr = NULL;
  1314			}
  1315			ndoptlen -= l;
  1316			nd_opt = ((void *)nd_opt) + l;
  1317		}
  1318	}
  1319	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net: bonding: Add support for IPV6 ns/na
  2021-12-09  6:50 [PATCH] net: bonding: Add support for IPV6 ns/na Sun Shouxin
  2021-12-09 11:57   ` kernel test robot
@ 2021-12-09 20:22 ` Jay Vosburgh
  2021-12-10  7:09   ` 孙守鑫
  1 sibling, 1 reply; 5+ messages in thread
From: Jay Vosburgh @ 2021-12-09 20:22 UTC (permalink / raw)
  To: Sun Shouxin; +Cc: vfalico, andy, davem, kuba, netdev, linux-kernel, huyd12

Sun Shouxin <sunshouxin@chinatelecom.cn> wrote:

>Since ipv6 neighbor solicitation and advertisement messages
>isn't handled gracefully in bonding6 driver, we can see packet
>drop due to inconsistency bewteen mac address in the option
>message and source MAC .

	Could you provide a specific example where this occurs?

>Another examples is ipv6 neighbor solicitation and advertisement
>messages from VM via tap attached to host brighe, the src mac
>mighe be changed through balance-alb mode, but it is not synced
>with Link-layer address in the option message.

	What happens if the MAC assignment changes because alb does a
rebalance?

>The patch implements bond6's tx handle for ipv6 neighbor
>solicitation and advertisement messages.

	A few additional minor comments below.

>Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>---
> drivers/net/bonding/bond_alb.c | 127 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 127 insertions(+)
>
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index 2ec8e01..01566ba 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -22,6 +22,7 @@
> #include <asm/byteorder.h>
> #include <net/bonding.h>
> #include <net/bond_alb.h>
>+#include <net/ndisc.h>
> 
> static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
> 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
>@@ -1269,6 +1270,112 @@ static int alb_set_mac_address(struct bonding *bond, void *addr)
> 	return res;
> }
> 
>+static void alb_change_nd_option(struct sk_buff *skb, void *data)
>+{
>+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>+	struct net_device *dev = skb->dev;
>+	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
>+	struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
>+	u8 *lladdr = NULL;
>+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>+				offsetof(struct nd_msg, opt));
>+
>+	while (ndoptlen) {
>+		int l;
>+
>+		switch (nd_opt->nd_opt_type) {
>+		case ND_OPT_SOURCE_LL_ADDR:
>+		case ND_OPT_TARGET_LL_ADDR:
>+		lladdr = ndisc_opt_addr_data(nd_opt, dev);
>+		break;
>+
>+		default:
>+		break;
>+		}
>+
>+		l = nd_opt->nd_opt_len << 3;
>+
>+		if (ndoptlen < l || l == 0)
>+			return;
>+
>+		if (lladdr) {
>+			memcpy(lladdr, data, dev->addr_len);
>+			lladdr = NULL;
>+			icmp6h->icmp6_cksum = 0;
>+
>+			icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
>+							      &ip6hdr->daddr,
>+						ntohs(ip6hdr->payload_len),
>+						IPPROTO_ICMPV6,
>+						csum_partial(icmp6h,
>+							     ntohs(ip6hdr->payload_len), 0));
>+			lladdr = NULL;

	"lladdr = NULL" could be in the default: case, above, instead of
being done here (and it's here twice).

>+		}
>+		ndoptlen -= l;
>+		nd_opt = ((void *)nd_opt) + l;
>+	}
>+}
>+
>+static u8 *alb_get_lladdr(struct sk_buff *skb)
>+{
>+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>+	struct net_device *dev = skb->dev;
>+	u8 *lladdr = NULL;
>+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>+				offsetof(struct nd_msg, opt));
>+
>+	while (ndoptlen) {
>+		int l;
>+
>+		switch (nd_opt->nd_opt_type) {
>+		case ND_OPT_SOURCE_LL_ADDR:
>+		case ND_OPT_TARGET_LL_ADDR:
>+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
>+			break;
>+
>+		default:
>+			break;
>+		}
>+
>+		l = nd_opt->nd_opt_len << 3;
>+
>+		if (ndoptlen < l || l == 0)
>+			return lladdr;
>+
>+		if (lladdr)
>+			return lladdr;
>+
>+		ndoptlen -= l;
>+		nd_opt = ((void *)nd_opt) + l;
>+	}
>+
>+	return lladdr;
>+}
>+
>+static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
>+			      struct slave *tx_slave)
>+{
>+	struct ipv6hdr *ip6hdr;
>+	struct icmp6hdr *hdr = NULL;
>+
>+	if (tx_slave && tx_slave != rcu_access_pointer(bond->curr_active_slave)) {
>+		if (ntohs(skb->protocol) == ETH_P_IPV6) {

	Nit: use "skb->protocol == htons(ETH_P_IPV6)" as the compiler
should optimize the htons() of a constant.  Also, you may want to
consider reordering the tests here, as IPv6 NA/NS traffic is likely to
be the vast minority.

>+			ip6hdr = ipv6_hdr(skb);
>+			if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>+				hdr = icmp6_hdr(skb);
>+				if (hdr->icmp6_type ==
>+				    NDISC_NEIGHBOUR_ADVERTISEMENT ||
>+				     hdr->icmp6_type ==
>+				     NDISC_NEIGHBOUR_SOLICITATION) {

	This construct appears twice, perhaps it deserves its own
boolean-return function?

	-J

>+					alb_change_nd_option(skb, tx_slave->dev->dev_addr);
>+				}
>+			}
>+		}
>+	}
>+}
>+
> /************************ exported alb functions ************************/
> 
> int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>@@ -1415,6 +1522,7 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
> 	}
> 	case ETH_P_IPV6: {
> 		const struct ipv6hdr *ip6hdr;
>+		struct icmp6hdr *hdr = NULL;
> 
> 		/* IPv6 doesn't really use broadcast mac address, but leave
> 		 * that here just in case.
>@@ -1446,6 +1554,24 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
> 			break;
> 		}
> 
>+		if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>+			hdr = icmp6_hdr(skb);
>+			if (hdr->icmp6_type ==
>+			    NDISC_NEIGHBOUR_ADVERTISEMENT ||
>+			    hdr->icmp6_type ==
>+			    NDISC_NEIGHBOUR_SOLICITATION) {
>+				u8 *lladdr = NULL;
>+
>+				lladdr = alb_get_lladdr(skb);
>+				if (lladdr) {
>+					if (!bond_slave_has_mac_rx(bond, lladdr)) {
>+						do_tx_balance = false;
>+						break;
>+					}
>+				}
>+			}
>+		}
>+
> 		hash_start = (char *)&ip6hdr->daddr;
> 		hash_size = sizeof(ip6hdr->daddr);
> 		break;
>@@ -1489,6 +1615,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
> 	struct slave *tx_slave = NULL;
> 
> 	tx_slave = bond_xmit_alb_slave_get(bond, skb);
>+	alb_set_nd_option(skb, bond, tx_slave);
> 	return bond_do_alb_xmit(skb, bond, tx_slave);
> }
> 
>-- 
>1.8.3.1
>

---
	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net: bonding: Add support for IPV6 ns/na
  2021-12-09 20:22 ` Jay Vosburgh
@ 2021-12-10  7:09   ` 孙守鑫
  0 siblings, 0 replies; 5+ messages in thread
From: 孙守鑫 @ 2021-12-10  7:09 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: vfalico, andy, davem, kuba, netdev, linux-kernel, huyd12


在 2021/12/10 4:22, Jay Vosburgh 写道:
> Sun Shouxin <sunshouxin@chinatelecom.cn> wrote:
>
>> Since ipv6 neighbor solicitation and advertisement messages
>> isn't handled gracefully in bonding6 driver, we can see packet
>> drop due to inconsistency bewteen mac address in the option
>> message and source MAC .
> 	Could you provide a specific example where this occurs?

                       Border-Leaf
                       /        \
                      /          \
                  Tunnel1    Tunnel2
                    /              \
                   /                \
                 Leaf-1--Tunnel3--Leaf-2
                   \                /
                    \              /
                     \            /
                      \          /
                     NIC1    NIC2
                        \      /
                         server

We can see in our lab the Border-Leaf receives occasionally a NA packet 
which is assgined to NIC1 mac in ND/NS option message, but actaully send 
out via NIC2 mac due to tx-alb,
as a result, it will cause inconsistency between MAC table and ND Table 
in Border-Leaf, i.e, NIC1 = Tunnel2 in ND table and  NIC1 = Tunnel1 in 
mac table.

And then, Border-Leaf starts to forward packet destinated to the Server, 
it will only check the ND table entry in some switch to encapsulate the 
destination MAC of the message as NIC1 MAC,
and then send it out from Tunnel2 by ND table.
Then, Leaf-2 receives the packet, it notices the destination MAC of 
message is NIC1 MAC and should forword it to Tunne1 by Tunnel3.
However, this traffic forward will be failure due to split horizon of 
VxLAN tunnels.

>
>> Another examples is ipv6 neighbor solicitation and advertisement
>> messages from VM via tap attached to host brighe, the src mac
>> mighe be changed through balance-alb mode, but it is not synced
>> with Link-layer address in the option message.
> 	What happens if the MAC assignment changes because alb does a
> rebalance?
The same result occurs as above, it just is specific to virtualization 
conext.
In this case, the src mac of packet send out by host is from VM' tap and 
not NIC1/NIC2 in host.
>
>> The patch implements bond6's tx handle for ipv6 neighbor
>> solicitation and advertisement messages.
> 	A few additional minor comments below.
Thanks your comment, I'll adjust it and send out V2 soon.
>
>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>> ---
>> drivers/net/bonding/bond_alb.c | 127 +++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 127 insertions(+)
>>
>> diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>> index 2ec8e01..01566ba 100644
>> --- a/drivers/net/bonding/bond_alb.c
>> +++ b/drivers/net/bonding/bond_alb.c
>> @@ -22,6 +22,7 @@
>> #include <asm/byteorder.h>
>> #include <net/bonding.h>
>> #include <net/bond_alb.h>
>> +#include <net/ndisc.h>
>>
>> static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
>> 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
>> @@ -1269,6 +1270,112 @@ static int alb_set_mac_address(struct bonding *bond, void *addr)
>> 	return res;
>> }
>>
>> +static void alb_change_nd_option(struct sk_buff *skb, void *data)
>> +{
>> +	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>> +	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>> +	struct net_device *dev = skb->dev;
>> +	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
>> +	struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
>> +	u8 *lladdr = NULL;
>> +	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>> +				offsetof(struct nd_msg, opt));
>> +
>> +	while (ndoptlen) {
>> +		int l;
>> +
>> +		switch (nd_opt->nd_opt_type) {
>> +		case ND_OPT_SOURCE_LL_ADDR:
>> +		case ND_OPT_TARGET_LL_ADDR:
>> +		lladdr = ndisc_opt_addr_data(nd_opt, dev);
>> +		break;
>> +
>> +		default:
>> +		break;
>> +		}
>> +
>> +		l = nd_opt->nd_opt_len << 3;
>> +
>> +		if (ndoptlen < l || l == 0)
>> +			return;
>> +
>> +		if (lladdr) {
>> +			memcpy(lladdr, data, dev->addr_len);
>> +			lladdr = NULL;
>> +			icmp6h->icmp6_cksum = 0;
>> +
>> +			icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
>> +							      &ip6hdr->daddr,
>> +						ntohs(ip6hdr->payload_len),
>> +						IPPROTO_ICMPV6,
>> +						csum_partial(icmp6h,
>> +							     ntohs(ip6hdr->payload_len), 0));
>> +			lladdr = NULL;
> 	"lladdr = NULL" could be in the default: case, above, instead of
> being done here (and it's here twice).
>
>> +		}
>> +		ndoptlen -= l;
>> +		nd_opt = ((void *)nd_opt) + l;
>> +	}
>> +}
>> +
>> +static u8 *alb_get_lladdr(struct sk_buff *skb)
>> +{
>> +	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>> +	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>> +	struct net_device *dev = skb->dev;
>> +	u8 *lladdr = NULL;
>> +	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>> +				offsetof(struct nd_msg, opt));
>> +
>> +	while (ndoptlen) {
>> +		int l;
>> +
>> +		switch (nd_opt->nd_opt_type) {
>> +		case ND_OPT_SOURCE_LL_ADDR:
>> +		case ND_OPT_TARGET_LL_ADDR:
>> +			lladdr = ndisc_opt_addr_data(nd_opt, dev);
>> +			break;
>> +
>> +		default:
>> +			break;
>> +		}
>> +
>> +		l = nd_opt->nd_opt_len << 3;
>> +
>> +		if (ndoptlen < l || l == 0)
>> +			return lladdr;
>> +
>> +		if (lladdr)
>> +			return lladdr;
>> +
>> +		ndoptlen -= l;
>> +		nd_opt = ((void *)nd_opt) + l;
>> +	}
>> +
>> +	return lladdr;
>> +}
>> +
>> +static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
>> +			      struct slave *tx_slave)
>> +{
>> +	struct ipv6hdr *ip6hdr;
>> +	struct icmp6hdr *hdr = NULL;
>> +
>> +	if (tx_slave && tx_slave != rcu_access_pointer(bond->curr_active_slave)) {
>> +		if (ntohs(skb->protocol) == ETH_P_IPV6) {
> 	Nit: use "skb->protocol == htons(ETH_P_IPV6)" as the compiler
> should optimize the htons() of a constant.  Also, you may want to
> consider reordering the tests here, as IPv6 NA/NS traffic is likely to
> be the vast minority.
>
>> +			ip6hdr = ipv6_hdr(skb);
>> +			if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>> +				hdr = icmp6_hdr(skb);
>> +				if (hdr->icmp6_type ==
>> +				    NDISC_NEIGHBOUR_ADVERTISEMENT ||
>> +				     hdr->icmp6_type ==
>> +				     NDISC_NEIGHBOUR_SOLICITATION) {
> 	This construct appears twice, perhaps it deserves its own
> boolean-return function?
>
> 	-J
>
>> +					alb_change_nd_option(skb, tx_slave->dev->dev_addr);
>> +				}
>> +			}
>> +		}
>> +	}
>> +}
>> +
>> /************************ exported alb functions ************************/
>>
>> int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>> @@ -1415,6 +1522,7 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
>> 	}
>> 	case ETH_P_IPV6: {
>> 		const struct ipv6hdr *ip6hdr;
>> +		struct icmp6hdr *hdr = NULL;
>>
>> 		/* IPv6 doesn't really use broadcast mac address, but leave
>> 		 * that here just in case.
>> @@ -1446,6 +1554,24 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
>> 			break;
>> 		}
>>
>> +		if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>> +			hdr = icmp6_hdr(skb);
>> +			if (hdr->icmp6_type ==
>> +			    NDISC_NEIGHBOUR_ADVERTISEMENT ||
>> +			    hdr->icmp6_type ==
>> +			    NDISC_NEIGHBOUR_SOLICITATION) {
>> +				u8 *lladdr = NULL;
>> +
>> +				lladdr = alb_get_lladdr(skb);
>> +				if (lladdr) {
>> +					if (!bond_slave_has_mac_rx(bond, lladdr)) {
>> +						do_tx_balance = false;
>> +						break;
>> +					}
>> +				}
>> +			}
>> +		}
>> +
>> 		hash_start = (char *)&ip6hdr->daddr;
>> 		hash_size = sizeof(ip6hdr->daddr);
>> 		break;
>> @@ -1489,6 +1615,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
>> 	struct slave *tx_slave = NULL;
>>
>> 	tx_slave = bond_xmit_alb_slave_get(bond, skb);
>> +	alb_set_nd_option(skb, bond, tx_slave);
>> 	return bond_do_alb_xmit(skb, bond, tx_slave);
>> }
>>
>> -- 
>> 1.8.3.1
>>
> ---
> 	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-12-10  7:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-09  6:50 [PATCH] net: bonding: Add support for IPV6 ns/na Sun Shouxin
2021-12-09 11:57 ` kernel test robot
2021-12-09 11:57   ` kernel test robot
2021-12-09 20:22 ` Jay Vosburgh
2021-12-10  7:09   ` 孙守鑫

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.