All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
@ 2022-03-17  6:15 Sun Shouxin
  2022-03-17  8:11 ` Jiri Pirko
  2022-03-17 18:49 ` David Ahern
  0 siblings, 2 replies; 10+ messages in thread
From: Sun Shouxin @ 2022-03-17  6:15 UTC (permalink / raw)
  To: j.vosburgh, vfalico, andy, davem, kuba, yoshfuji, dsahern, oliver
  Cc: netdev, linux-kernel, huyd12, sunshouxin

This patch is implementing IPV6 RLB for balance-alb mode.

Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
---
changelog:
v1-->v2:
-Remove ndisc_bond_send_na and refactor ndisc_send_na.
-In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
-Don't send neighbor advertisement message when receiving
 neighbor advertisement message in rlb6_update_entry_from_na.

v2-->v3:
-Don't export ndisc_send_na.
-Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
 in rlb6_update_client.

v3-->v4:
-Submit all code at a whole patch.
---
 drivers/net/bonding/bond_3ad.c     |   2 +-
 drivers/net/bonding/bond_alb.c     | 592 ++++++++++++++++++++++++++++-
 drivers/net/bonding/bond_debugfs.c |  14 +
 drivers/net/bonding/bond_main.c    |   6 +-
 drivers/net/usb/cdc_mbim.c         |   2 +-
 include/net/bond_3ad.h             |   2 +-
 include/net/bond_alb.h             |   7 +
 include/net/bonding.h              |   6 +-
 include/net/ipv6_stubs.h           |   3 +-
 include/net/ndisc.h                |   9 +-
 net/ipv6/addrconf.c                |   4 +-
 net/ipv6/ndisc.c                   |  64 +++-
 12 files changed, 675 insertions(+), 36 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index a86b1f71762e..3cba269f12e2 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2682,7 +2682,7 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
 	return ret;
 }
 
-int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
+int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct bonding *bond,
 			 struct slave *slave)
 {
 	struct lacpdu *lacpdu, _lacpdu;
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 303c8d32d451..06a4557e00e3 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -23,6 +23,9 @@
 #include <asm/byteorder.h>
 #include <net/bonding.h>
 #include <net/bond_alb.h>
+#include <net/addrconf.h>
+#include <net/ip6_checksum.h>
+#include <net/ipv6_stubs.h>
 
 static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
 	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
@@ -57,6 +60,13 @@ static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
 static void rlb_src_unlink(struct bonding *bond, u32 index);
 static void rlb_src_link(struct bonding *bond, u32 ip_src_hash,
 			 u32 ip_dst_hash);
+static void rlb6_delete_table_entry(struct bonding *bond, u32 index);
+static u8 *alb_get_lladdr(struct sk_buff *skb);
+static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
+			      struct slave *tx_slave);
+static bool alb_determine_ipv6_nd(struct sk_buff *skb, struct bonding *bond);
+static int rlb_recv(struct sk_buff *skb, struct bonding *bond,
+		    struct slave *slave);
 
 static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
 {
@@ -269,7 +279,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
 	spin_unlock_bh(&bond->mode_lock);
 }
 
-static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
+static int rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
 			struct slave *slave)
 {
 	struct arp_pkt *arp, _arp;
@@ -415,6 +425,31 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
 		}
 	}
 
+	rx_hash_table = bond_info->rx6_hashtbl;
+	index = bond_info->rx6_hashtbl_used_head;
+	for (; index != RLB_NULL_INDEX; index = next_index) {
+		next_index = rx_hash_table[index].used_next;
+		if (rx_hash_table[index].slave == slave) {
+			struct slave *assigned_slave = rlb_next_rx_slave(bond);
+
+			if (assigned_slave) {
+				u8 mac_dst[ETH_ALEN];
+
+				rx_hash_table[index].slave = assigned_slave;
+				memcpy(mac_dst, rx_hash_table[index].mac_dst,
+				       sizeof(mac_dst));
+				if (is_valid_ether_addr(mac_dst)) {
+					bond_info->rx6_hashtbl[index].ntt = 1;
+					bond_info->rx6_ntt = 1;
+					bond_info->rlb6_update_retry_counter =
+						RLB_UPDATE_RETRY;
+				}
+			} else {  /* there is no active slave */
+				rx_hash_table[index].slave = NULL;
+			}
+		}
+	}
+
 	spin_unlock_bh(&bond->mode_lock);
 
 	if (slave != rtnl_dereference(bond->curr_active_slave))
@@ -704,7 +739,7 @@ static void rlb_rebalance(struct bonding *bond)
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct slave *assigned_slave;
 	struct rlb_client_info *client_info;
-	int ntt;
+	int ntt, ntt_ip6;
 	u32 hash_index;
 
 	spin_lock_bh(&bond->mode_lock);
@@ -724,9 +759,27 @@ static void rlb_rebalance(struct bonding *bond)
 		}
 	}
 
+	ntt_ip6 = 0;
+	hash_index = bond_info->rx6_hashtbl_used_head;
+	for (; hash_index != RLB_NULL_INDEX;
+		 hash_index = client_info->used_next) {
+		client_info = &bond_info->rx6_hashtbl[hash_index];
+		assigned_slave = __rlb_next_rx_slave(bond);
+		if (assigned_slave && client_info->slave != assigned_slave) {
+			client_info->slave = assigned_slave;
+			if (!is_zero_ether_addr(client_info->mac_dst)) {
+				client_info->ntt = 1;
+				ntt_ip6 = 1;
+			}
+		}
+	}
+
 	/* update the team's flag only after the whole iteration */
 	if (ntt)
 		bond_info->rx_ntt = 1;
+
+	if (ntt_ip6)
+		bond_info->rx6_ntt = 1;
 	spin_unlock_bh(&bond->mode_lock);
 }
 
@@ -846,6 +899,7 @@ static int rlb_initialize(struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct rlb_client_info	*new_hashtbl;
+	struct rlb_client_info	*new6_hashtbl;
 	int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
 	int i;
 
@@ -853,19 +907,29 @@ static int rlb_initialize(struct bonding *bond)
 	if (!new_hashtbl)
 		return -1;
 
+	new6_hashtbl = kmalloc(size, GFP_KERNEL);
+	if (!new6_hashtbl) {
+		kfree(new_hashtbl);
+		return -1;
+	}
+
 	spin_lock_bh(&bond->mode_lock);
 
 	bond_info->rx_hashtbl = new_hashtbl;
+	bond_info->rx6_hashtbl = new6_hashtbl;
 
 	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
+	bond_info->rx6_hashtbl_used_head = RLB_NULL_INDEX;
 
-	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)
+	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) {
 		rlb_init_table_entry(bond_info->rx_hashtbl + i);
+		rlb_init_table_entry(bond_info->rx6_hashtbl + i);
+	}
 
 	spin_unlock_bh(&bond->mode_lock);
 
 	/* register to receive ARPs */
-	bond->recv_probe = rlb_arp_recv;
+	bond->recv_probe = rlb_recv;
 
 	return 0;
 }
@@ -880,6 +944,10 @@ static void rlb_deinitialize(struct bonding *bond)
 	bond_info->rx_hashtbl = NULL;
 	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
 
+	kfree(bond_info->rx6_hashtbl);
+	bond_info->rx6_hashtbl = NULL;
+	bond_info->rx6_hashtbl_used_head = RLB_NULL_INDEX;
+
 	spin_unlock_bh(&bond->mode_lock);
 }
 
@@ -901,9 +969,397 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
 		curr_index = next_index;
 	}
 
+	curr_index = bond_info->rx6_hashtbl_used_head;
+	while (curr_index != RLB_NULL_INDEX) {
+		struct rlb_client_info *curr = &bond_info->rx6_hashtbl[curr_index];
+		u32 next_index = bond_info->rx6_hashtbl[curr_index].used_next;
+
+		if (curr->vlan_id == vlan_id)
+			rlb6_delete_table_entry(bond, curr_index);
+
+		curr_index = next_index;
+	}
+
+	spin_unlock_bh(&bond->mode_lock);
+}
+
+/*********************** ipv6 rlb specific functions ***************************/
+static void rlb6_update_client(struct rlb_client_info *client_info)
+{
+	struct nd_sendinfo sendinfo;
+	int i;
+
+	if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst))
+		return;
+
+	sendinfo.vlanid = client_info->vlan_id;
+	sendinfo.mac_dst = client_info->mac_dst;
+	sendinfo.mac_src = client_info->slave->dev->dev_addr;
+
+	for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
+		ipv6_stub->ndisc_send_na(client_info->slave->dev,
+					 &client_info->ip6_dst,
+					 &client_info->ip6_src,
+					 false, false, true, true,
+					 &sendinfo);
+	}
+}
+
+static void rlb6_update_rx_clients(struct bonding *bond)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	struct rlb_client_info *client_info;
+	u32 hash_index;
+
+	spin_lock_bh(&bond->mode_lock);
+
+	hash_index = bond_info->rx6_hashtbl_used_head;
+	for (; hash_index != RLB_NULL_INDEX;
+	    hash_index = client_info->used_next) {
+		client_info = &bond_info->rx6_hashtbl[hash_index];
+		if (client_info->ntt) {
+			rlb6_update_client(client_info);
+			if (bond_info->rlb6_update_retry_counter == 0)
+				client_info->ntt = 0;
+		}
+	}
+
+	bond_info->rlb6_update_delay_counter = RLB_UPDATE_DELAY;
+
+	spin_unlock_bh(&bond->mode_lock);
+}
+
+static void rlb6_delete_table_entry_dst(struct bonding *bond, u32 index)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	u32 next_index = bond_info->rx6_hashtbl[index].used_next;
+	u32 prev_index = bond_info->rx6_hashtbl[index].used_prev;
+
+	if (index == bond_info->rx6_hashtbl_used_head)
+		bond_info->rx6_hashtbl_used_head = next_index;
+
+	if (next_index != RLB_NULL_INDEX)
+		bond_info->rx6_hashtbl[next_index].used_prev = prev_index;
+
+	if (prev_index != RLB_NULL_INDEX)
+		bond_info->rx6_hashtbl[prev_index].used_next = next_index;
+}
+
+static void rlb6_src_link(struct bonding *bond, u32 ip_src_hash,
+			  u32 ip_dst_hash)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	u32 next;
+
+	bond_info->rx6_hashtbl[ip_dst_hash].src_prev = ip_src_hash;
+	next = bond_info->rx6_hashtbl[ip_src_hash].src_first;
+	bond_info->rx6_hashtbl[ip_dst_hash].src_next = next;
+	if (next != RLB_NULL_INDEX)
+		bond_info->rx6_hashtbl[next].src_prev = ip_dst_hash;
+	bond_info->rx6_hashtbl[ip_src_hash].src_first = ip_dst_hash;
+}
+
+static void rlb6_src_unlink(struct bonding *bond, u32 index)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	u32 next_index = bond_info->rx6_hashtbl[index].src_next;
+	u32 prev_index = bond_info->rx6_hashtbl[index].src_prev;
+
+	bond_info->rx6_hashtbl[index].src_next = RLB_NULL_INDEX;
+	bond_info->rx6_hashtbl[index].src_prev = RLB_NULL_INDEX;
+
+	if (next_index != RLB_NULL_INDEX)
+		bond_info->rx6_hashtbl[next_index].src_prev = prev_index;
+
+	if (prev_index == RLB_NULL_INDEX)
+		return;
+
+	if (bond_info->rx6_hashtbl[prev_index].src_first == index)
+		bond_info->rx6_hashtbl[prev_index].src_first = next_index;
+	else
+		bond_info->rx6_hashtbl[prev_index].src_next = next_index;
+}
+
+static void rlb6_req_update_slave_clients(struct bonding *bond,
+					  struct slave *slave)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	struct rlb_client_info *client_info;
+	u32 hash_index;
+	int ntt = 0;
+
+	spin_lock_bh(&bond->mode_lock);
+
+	hash_index = bond_info->rx6_hashtbl_used_head;
+	for (; hash_index != RLB_NULL_INDEX;
+	    hash_index = client_info->used_next) {
+		client_info = &bond_info->rx6_hashtbl[hash_index];
+		if (client_info->slave == slave &&
+		    is_valid_ether_addr(client_info->mac_dst)) {
+			client_info->ntt = 1;
+			ntt = 1;
+		}
+	}
+
+	if (ntt) {
+		bond_info->rx6_ntt = 1;
+		bond_info->rlb6_update_retry_counter =
+			RLB_UPDATE_RETRY;
+	}
+	spin_unlock_bh(&bond->mode_lock);
+}
+
+static struct slave *rlb6_nd_choose_channel(struct sk_buff *skb,
+					    struct bonding *bond,
+					    struct ipv6hdr *ip6hdr,
+					    u8 type)
+{
+	struct nd_msg *msg;
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	struct slave *assigned_slave, *curr_active_slave;
+	struct rlb_client_info *client_info;
+	struct ethhdr *eth_data;
+	u8 *dst_ip;
+	u32 hash_index = 0;
+
+	spin_lock(&bond->mode_lock);
+
+	msg = (struct nd_msg *)skb_transport_header(skb);
+	eth_data = eth_hdr(skb);
+	curr_active_slave = rcu_dereference(bond->curr_active_slave);
+
+	if (type == NDISC_NEIGHBOUR_SOLICITATION)
+		dst_ip = (u8 *)msg->target.s6_addr;
+	else
+		dst_ip = (u8 *)ip6hdr->daddr.s6_addr;
+
+	hash_index = _simple_hash(dst_ip,
+				  sizeof(struct in6_addr));
+	client_info = &bond_info->rx6_hashtbl[hash_index];
+
+	if (client_info->assigned) {
+		if (!memcmp(client_info->ip6_dst.s6_addr, dst_ip,
+			    sizeof(struct in6_addr)) &&
+			    !memcmp(client_info->ip6_src.s6_addr,
+			    ip6hdr->saddr.s6_addr,
+			    sizeof(ip6hdr->saddr.s6_addr))) {
+			ether_addr_copy(client_info->mac_src,
+					eth_data->h_source);
+
+			assigned_slave = client_info->slave;
+			if (assigned_slave) {
+				spin_unlock(&bond->mode_lock);
+				return assigned_slave;
+			}
+		} else {
+			if (curr_active_slave &&
+			    curr_active_slave != client_info->slave) {
+				client_info->slave = curr_active_slave;
+				rlb6_update_client(client_info);
+			}
+		}
+	}
+
+	/* assign a new slave */
+	assigned_slave = __rlb_next_rx_slave(bond);
+
+	if (assigned_slave) {
+		if (!(client_info->assigned &&
+		      !memcmp(client_info->ip6_src.s6_addr,
+		      ip6hdr->saddr.s6_addr, sizeof(ip6hdr->saddr.s6_addr)))) {
+			u32 hash_src = _simple_hash((u8 *)ip6hdr->saddr.s6_addr,
+						sizeof(ip6hdr->saddr.s6_addr));
+
+			rlb6_src_unlink(bond, hash_index);
+			rlb6_src_link(bond, hash_src, hash_index);
+		}
+
+		memcpy(client_info->ip6_src.s6_addr, ip6hdr->saddr.s6_addr,
+		       sizeof(ip6hdr->saddr.s6_addr));
+		memcpy(client_info->ip6_dst.s6_addr, dst_ip,
+		       sizeof(struct in6_addr));
+
+		ether_addr_copy(client_info->mac_dst, eth_data->h_dest);
+		ether_addr_copy(client_info->mac_src, eth_data->h_source);
+
+		client_info->slave = assigned_slave;
+
+		if (is_valid_ether_addr(client_info->mac_dst)) {
+			client_info->ntt = 1;
+			bond->alb_info.rx6_ntt = 1;
+		} else {
+			client_info->ntt = 0;
+		}
+
+		if (vlan_get_tag(skb, &client_info->vlan_id))
+			client_info->vlan_id = 0;
+
+		if (!client_info->assigned) {
+			u32 prev_tbl_head = bond_info->rx6_hashtbl_used_head;
+
+			bond_info->rx6_hashtbl_used_head = hash_index;
+			client_info->used_next = prev_tbl_head;
+			if (prev_tbl_head != RLB_NULL_INDEX)
+				bond_info->rx6_hashtbl[prev_tbl_head].used_prev = hash_index;
+			client_info->assigned = 1;
+		}
+	}
+
+	spin_unlock(&bond->mode_lock);
+
+	return assigned_slave;
+}
+
+static struct slave *rlb_nd_xmit(struct sk_buff *skb, struct bonding *bond)
+{
+	struct slave *tx_slave = NULL;
+	struct ipv6hdr *ip6hdr;
+	struct icmp6hdr *hdr;
+	u8 *lladdr;
+
+	if (!pskb_network_may_pull(skb, sizeof(*ip6hdr)))
+		return NULL;
+
+	ip6hdr = ipv6_hdr(skb);
+	if (ip6hdr->nexthdr != IPPROTO_ICMPV6)
+		return NULL;
+
+	if (!pskb_network_may_pull(skb, sizeof(*ip6hdr) + sizeof(*hdr)))
+		return NULL;
+
+	hdr = icmp6_hdr(skb);
+
+	if (hdr->icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT &&
+	    hdr->icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) {
+		return NULL;
+	}
+
+	lladdr = alb_get_lladdr(skb);
+	if (!lladdr)
+		return NULL;
+
+	if (!bond_slave_has_mac_rx(bond, lladdr)) {
+		tx_slave = rcu_dereference(bond->curr_active_slave);
+		return tx_slave;
+	}
+
+	tx_slave = rlb6_nd_choose_channel(skb, bond, ip6hdr, hdr->icmp6_type);
+	if (!tx_slave)
+		return NULL;
+
+	alb_set_nd_option(skb, bond, tx_slave);
+
+	return tx_slave;
+}
+
+static void rlb6_update_entry_from_na(struct bonding *bond,
+				      struct ipv6hdr *ip6hdr, u8 *lladdr)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	struct rlb_client_info *client_info;
+	u32 hash_index;
+
+	spin_lock_bh(&bond->mode_lock);
+
+	hash_index = _simple_hash(ip6hdr->saddr.s6_addr,
+				  sizeof(ip6hdr->saddr.s6_addr));
+	client_info = &bond_info->rx6_hashtbl[hash_index];
+
+	if (client_info->assigned &&
+	    !memcmp(ip6hdr->saddr.s6_addr, client_info->ip6_dst.s6_addr,
+	    sizeof(ip6hdr->saddr.s6_addr)) && !memcmp(ip6hdr->daddr.s6_addr,
+	    client_info->ip6_src.s6_addr, sizeof(ip6hdr->daddr.s6_addr)) &&
+	    !ether_addr_equal_64bits(client_info->mac_dst, lladdr)) {
+		memcpy(client_info->mac_dst, lladdr,
+		       sizeof(client_info->mac_dst));
+	}
 	spin_unlock_bh(&bond->mode_lock);
 }
 
+static void rlb6_delete_table_entry(struct bonding *bond, u32 index)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	struct rlb_client_info *entry = &bond_info->rx_hashtbl[index];
+
+	rlb6_delete_table_entry_dst(bond, index);
+	rlb_init_table_entry_dst(entry);
+	rlb6_src_unlink(bond, index);
+}
+
+static void rlb6_purge_src_ip(struct bonding *bond, struct ipv6hdr *ip6hdr,
+			      u8 *lladdr)
+{
+	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
+	struct rlb_client_info *client_info;
+	u32 ip_src_hash = _simple_hash((u8 *)ip6hdr->saddr.s6_addr,
+					sizeof(ip6hdr->saddr.s6_addr));
+	u32 index, next_index;
+
+	spin_lock_bh(&bond->mode_lock);
+
+	index = bond_info->rx6_hashtbl[ip_src_hash].src_first;
+	while (index != RLB_NULL_INDEX) {
+		client_info = &bond_info->rx6_hashtbl[index];
+		next_index = client_info->src_next;
+
+		if (!memcmp(client_info->ip6_src.s6_addr,
+			    ip6hdr->saddr.s6_addr,
+			    sizeof(ip6hdr->saddr.s6_addr)) &&
+			    !ether_addr_equal_64bits(lladdr,
+			    client_info->mac_src))
+			rlb6_delete_table_entry(bond, index);
+		index = next_index;
+	}
+
+	spin_unlock_bh(&bond->mode_lock);
+}
+
+static int rlb_nd_recv(struct sk_buff *skb, struct bonding *bond)
+{
+	struct ipv6hdr *ip6hdr;
+	struct nd_msg *msg;
+	struct inet6_ifaddr *ifp;
+	u8 *lladdr;
+
+	if (!pskb_network_may_pull(skb, sizeof(*ip6hdr)))
+		return RX_HANDLER_ANOTHER;
+
+	ip6hdr = ipv6_hdr(skb);
+
+	ifp = ipv6_get_ifaddr(dev_net(skb->dev), &ip6hdr->saddr, NULL, 0);
+	if (ifp) {
+		in6_ifa_put(ifp);
+		return RX_HANDLER_ANOTHER;
+	}
+
+	if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr) +
+				sizeof(struct nd_msg)))
+		return RX_HANDLER_ANOTHER;
+
+	msg = (struct nd_msg *)skb_transport_header(skb);
+	lladdr = alb_get_lladdr(skb);
+	if (!lladdr)
+		return RX_HANDLER_ANOTHER;
+
+	rlb6_purge_src_ip(bond, ip6hdr, lladdr);
+
+	if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)
+		rlb6_update_entry_from_na(bond, ip6hdr, lladdr);
+
+	return RX_HANDLER_ANOTHER;
+}
+
+static int rlb_recv(struct sk_buff *skb, struct bonding *bond,
+		    struct slave *slave)
+{
+	if (skb->protocol == cpu_to_be16(ETH_P_ARP))
+		return rlb_arp_recv(skb, bond, slave);
+	else if (alb_determine_ipv6_nd(skb, bond))
+		return rlb_nd_recv(skb, bond);
+
+	return RX_HANDLER_ANOTHER;
+}
+
 /*********************** tlb/rlb shared functions *********************/
 
 static void alb_send_lp_vid(struct slave *slave, const u8 mac_addr[],
@@ -1068,6 +1524,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
 			 * has changed
 			 */
 			rlb_req_update_slave_clients(bond, slave1);
+			rlb6_req_update_slave_clients(bond, slave1);
 		}
 	} else {
 		disabled_slave = slave1;
@@ -1080,6 +1537,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
 			 * has changed
 			 */
 			rlb_req_update_slave_clients(bond, slave2);
+			rlb6_req_update_slave_clients(bond, slave2);
 		}
 	} else {
 		disabled_slave = slave2;
@@ -1291,6 +1749,111 @@ static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond)
 		hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION;
 }
 
+static bool alb_determine_ipv6_nd(struct sk_buff *skb, struct bonding *bond)
+{
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (skb_vlan_tag_present(skb))
+			skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+		return alb_determine_nd(skb, bond);
+	}
+
+	return false;
+}
+
+static void alb_change_nd_option(struct sk_buff *skb, const void *data)
+{
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+	struct net_device *dev = skb->dev;
+	struct icmp6hdr *icmp6h = icmp6_hdr(skb);
+	struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				offsetof(struct nd_msg, opt));
+
+	while (ndoptlen) {
+		int l;
+
+		switch (nd_opt->nd_opt_type) {
+		case ND_OPT_SOURCE_LL_ADDR:
+		case ND_OPT_TARGET_LL_ADDR:
+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
+			break;
+
+		default:
+			lladdr = NULL;
+			break;
+		}
+
+		l = nd_opt->nd_opt_len << 3;
+
+		if (ndoptlen < l || l == 0)
+			return;
+
+		if (lladdr) {
+			memcpy(lladdr, data, dev->addr_len);
+			icmp6h->icmp6_cksum = 0;
+
+			icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
+							      &ip6hdr->daddr,
+						ntohs(ip6hdr->payload_len),
+						IPPROTO_ICMPV6,
+						csum_partial(icmp6h,
+							     ntohs(ip6hdr->payload_len),
+							     0));
+			return;
+		}
+		ndoptlen -= l;
+		nd_opt = ((void *)nd_opt) + l;
+	}
+}
+
+static u8 *alb_get_lladdr(struct sk_buff *skb)
+{
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
+	struct net_device *dev = skb->dev;
+	u8 *lladdr = NULL;
+	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
+				offsetof(struct nd_msg, opt));
+
+	while (ndoptlen) {
+		int l;
+
+		switch (nd_opt->nd_opt_type) {
+		case ND_OPT_SOURCE_LL_ADDR:
+		case ND_OPT_TARGET_LL_ADDR:
+			lladdr = ndisc_opt_addr_data(nd_opt, dev);
+			break;
+
+		default:
+			break;
+		}
+
+		l = nd_opt->nd_opt_len << 3;
+
+		if (ndoptlen < l || l == 0)
+			return NULL;
+
+		if (lladdr)
+			return lladdr;
+
+		ndoptlen -= l;
+		nd_opt = ((void *)nd_opt) + l;
+	}
+
+	return lladdr;
+}
+
+static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
+			      struct slave *tx_slave)
+{
+	if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) {
+		if (alb_determine_nd(skb, bond))
+			alb_change_nd_option(skb, tx_slave->dev->dev_addr);
+	}
+}
+
 /************************ exported alb functions ************************/
 
 int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
@@ -1457,12 +2020,17 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
 			break;
 		}
 
-		if (alb_determine_nd(skb, bond)) {
+		tx_slave = rlb_nd_xmit(skb, bond);
+		if (tx_slave) {
+			do_tx_balance = false;
+			break;
+		}
+
+		if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) {
 			do_tx_balance = false;
 			break;
 		}
 
-		/* The IPv6 header is pulled by alb_determine_nd */
 		/* Additionally, DAD probes should not be tx-balanced as that
 		 * will lead to false positives for duplicate addresses and
 		 * prevent address configuration from working.
@@ -1612,6 +2180,17 @@ void bond_alb_monitor(struct work_struct *work)
 					bond_info->rx_ntt = 0;
 			}
 		}
+		if (bond_info->rx6_ntt) {
+			if (bond_info->rlb6_update_delay_counter) {
+				--bond_info->rlb6_update_delay_counter;
+			} else {
+				rlb6_update_rx_clients(bond);
+				if (bond_info->rlb6_update_retry_counter)
+					--bond_info->rlb6_update_retry_counter;
+				else
+					bond_info->rx6_ntt = 0;
+			}
+		}
 	}
 	rcu_read_unlock();
 re_arm:
@@ -1812,6 +2391,7 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
 		if (bond->alb_info.rlb_enabled) {
 			/* inform clients mac address has changed */
 			rlb_req_update_slave_clients(bond, curr_active);
+			rlb6_req_update_slave_clients(bond, curr_active);
 		}
 	}
 
diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c
index 4f9b4a18c74c..90e88ff9b2bf 100644
--- a/drivers/net/bonding/bond_debugfs.c
+++ b/drivers/net/bonding/bond_debugfs.c
@@ -41,6 +41,20 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)
 			client_info->slave->dev->name);
 	}
 
+	seq_puts(m, "SourceIP                                 DestinationIP                           Destination MAC   Src MAC           DEV\n");
+
+	hash_index = bond_info->rx6_hashtbl_used_head;
+	for (; hash_index != RLB_NULL_INDEX;
+	     hash_index = client_info->used_next) {
+		client_info = &bond_info->rx6_hashtbl[hash_index];
+		seq_printf(m, "%-40pI6 %-40pI6 %-17pM %-17pM %s\n",
+			   &client_info->ip6_src,
+			   &client_info->ip6_dst,
+			   &client_info->mac_dst,
+			   &client_info->mac_src,
+			   client_info->slave->dev->name);
+	}
+
 	spin_unlock_bh(&bond->mode_lock);
 
 	return 0;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 15eddca7b4b6..b6252b181940 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1510,8 +1510,8 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
 	struct sk_buff *skb = *pskb;
 	struct slave *slave;
 	struct bonding *bond;
-	int (*recv_probe)(const struct sk_buff *, struct bonding *,
-			  struct slave *);
+	int (*recv_probe)(struct sk_buff *skb, struct bonding *bond,
+			  struct slave *slave);
 	int ret = RX_HANDLER_ANOTHER;
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
@@ -3228,7 +3228,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
 }
 #endif
 
-int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond,
+int bond_rcv_validate(struct sk_buff *skb, struct bonding *bond,
 		      struct slave *slave)
 {
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index c89639381eca..70f4327dbd2a 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -347,7 +347,7 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci)
 				 is_router /* router */,
 				 true /* solicited */,
 				 false /* override */,
-				 true /* inc_opt */);
+				 true /* inc_opt */, NULL);
 out:
 	dev_put(netdev);
 }
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index 184105d68294..51886d9c928d 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -300,7 +300,7 @@ void bond_3ad_handle_link_change(struct slave *slave, char link);
 int  bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info);
 int  __bond_3ad_get_active_agg_info(struct bonding *bond,
 				    struct ad_info *ad_info);
-int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
+int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct bonding *bond,
 			 struct slave *slave);
 int bond_3ad_set_carrier(struct bonding *bond);
 void bond_3ad_update_lacp_active(struct bonding *bond);
diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
index 191c36afa1f4..b1a572eead31 100644
--- a/include/net/bond_alb.h
+++ b/include/net/bond_alb.h
@@ -94,6 +94,8 @@ struct tlb_client_info {
 struct rlb_client_info {
 	__be32 ip_src;		/* the server IP address */
 	__be32 ip_dst;		/* the client IP address */
+	struct in6_addr	ip6_src;
+	struct in6_addr	ip6_dst;
 	u8  mac_src[ETH_ALEN];	/* the server MAC address */
 	u8  mac_dst[ETH_ALEN];	/* the client MAC address */
 
@@ -131,10 +133,13 @@ struct alb_bond_info {
 	/* -------- rlb parameters -------- */
 	int rlb_enabled;
 	struct rlb_client_info	*rx_hashtbl;	/* Receive hash table */
+	struct rlb_client_info	*rx6_hashtbl;	/* Receive hash table */
 	u32			rx_hashtbl_used_head;
+	u32			rx6_hashtbl_used_head;
 	u8			rx_ntt;	/* flag - need to transmit
 					 * to all rx clients
 					 */
+	u8			rx6_ntt;
 	struct slave		*rx_slave;/* last slave to xmit from */
 	u8			primary_is_promisc;	   /* boolean */
 	u32			rlb_promisc_timeout_counter;/* counts primary
@@ -144,6 +149,8 @@ struct alb_bond_info {
 	u32			rlb_update_retry_counter;/* counter of retries
 							  * of client update
 							  */
+	u32			rlb6_update_delay_counter;
+	u32			rlb6_update_retry_counter;
 	u8			rlb_rebalance;	/* flag - indicates that the
 						 * rx traffic should be
 						 * rebalanced
diff --git a/include/net/bonding.h b/include/net/bonding.h
index b14f4c0b4e9e..552bce0168d1 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -220,8 +220,8 @@ struct bonding {
 	struct   bond_up_slave __rcu *all_slaves;
 	bool     force_primary;
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
-	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
-			      struct slave *);
+	int     (*recv_probe)(struct sk_buff *skb, struct bonding *bond,
+			      struct slave *slave);
 	/* mode_lock is used for mode-specific locking needs, currently used by:
 	 * 3ad mode (4) - protect against running bond_3ad_unbind_slave() and
 	 *                bond_3ad_state_machine_handler() concurrently and also
@@ -639,7 +639,7 @@ struct bond_net {
 	struct class_attribute	class_attr_bonding_masters;
 };
 
-int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
+int bond_rcv_validate(struct sk_buff *skb, struct bonding *bond, struct slave *slave);
 netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
 int bond_create(struct net *net, const char *name);
 int bond_create_sysfs(struct bond_net *net);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 45e0339be6fa..2b64ea6590b6 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -56,7 +56,8 @@ struct ipv6_stub {
 	void (*udpv6_encap_enable)(void);
 	void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
 			      const struct in6_addr *solicited_addr,
-			      bool router, bool solicited, bool override, bool inc_opt);
+			      bool router, bool solicited, bool override,
+			      bool inc_opt, void *data);
 #if IS_ENABLED(CONFIG_XFRM)
 	void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
 	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index da7eec8669ec..e71702a44a3d 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -107,6 +107,12 @@ struct nd_opt_hdr {
 	__u8		nd_opt_len;
 } __packed;
 
+struct nd_sendinfo {
+	__u16 vlanid;
+	void *mac_dst;
+	const void *mac_src;
+};
+
 /* ND options */
 struct ndisc_options {
 	struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
@@ -460,7 +466,8 @@ void ndisc_send_rs(struct net_device *dev,
 		   const struct in6_addr *saddr, const struct in6_addr *daddr);
 void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 		   const struct in6_addr *solicited_addr,
-		   bool router, bool solicited, bool override, bool inc_opt);
+		   bool router, bool solicited, bool override, bool inc_opt,
+		   void *data);
 
 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target);
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b22504176588..6825d70c34fb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -975,6 +975,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 
 	kfree_rcu(ifp, rcu);
 }
+EXPORT_SYMBOL(inet6_ifa_finish_destroy);
 
 static void
 ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
@@ -2037,6 +2038,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 
 	return result;
 }
+EXPORT_SYMBOL(ipv6_get_ifaddr);
 
 /* Gets referenced address, destroys ifaddr */
 
@@ -4217,7 +4219,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
 			      /*router=*/ !!ifp->idev->cnf.forwarding,
 			      /*solicited=*/ false, /*override=*/ true,
-			      /*inc_opt=*/ true);
+			      /*inc_opt=*/ true, NULL);
 	}
 
 	if (send_rs) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index fcb288b0ae13..47875aab86e5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -518,29 +518,37 @@ EXPORT_SYMBOL(ndisc_send_skb);
 
 void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 		   const struct in6_addr *solicited_addr,
-		   bool router, bool solicited, bool override, bool inc_opt)
+		   bool router, bool solicited, bool override, bool inc_opt,
+		   void *data)
 {
 	struct sk_buff *skb;
 	struct in6_addr tmpaddr;
 	struct inet6_ifaddr *ifp;
 	const struct in6_addr *src_addr;
 	struct nd_msg *msg;
+	struct nd_sendinfo *sendinfo = data;
+	struct net *net = dev_net(dev);
+	struct sock *sk = net->ipv6.ndisc_sk;
 	int optlen = 0;
 
-	/* for anycast or proxy, solicited_addr != src_addr */
-	ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
-	if (ifp) {
-		src_addr = solicited_addr;
-		if (ifp->flags & IFA_F_OPTIMISTIC)
-			override = false;
-		inc_opt |= ifp->idev->cnf.force_tllao;
-		in6_ifa_put(ifp);
+	if (!sendinfo) {
+		/* for anycast or proxy, solicited_addr != src_addr */
+		ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
+		if (ifp) {
+			src_addr = solicited_addr;
+			if (ifp->flags & IFA_F_OPTIMISTIC)
+				override = false;
+			inc_opt |= ifp->idev->cnf.force_tllao;
+			in6_ifa_put(ifp);
+		} else {
+			if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
+					       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
+					       &tmpaddr))
+				return;
+			src_addr = &tmpaddr;
+		}
 	} else {
-		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
-				       inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
-				       &tmpaddr))
-			return;
-		src_addr = &tmpaddr;
+		src_addr = solicited_addr;
 	}
 
 	if (!dev->addr_len)
@@ -568,8 +576,28 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 		ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR,
 				       dev->dev_addr,
 				       NDISC_NEIGHBOUR_ADVERTISEMENT);
+	if (!sendinfo) {
+		ndisc_send_skb(skb, daddr, src_addr);
+	} else {
+		if (sendinfo->vlanid)
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       sendinfo->vlanid);
+
+		msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, skb->len,
+							 IPPROTO_ICMPV6,
+							 csum_partial(&msg->icmph,
+								      skb->len, 0));
 
-	ndisc_send_skb(skb, daddr, src_addr);
+		ip6_nd_hdr(skb, src_addr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+
+		skb->protocol = htons(ETH_P_IPV6);
+		skb->dev = dev;
+		if (dev_hard_header(skb, dev, ETH_P_IPV6, sendinfo->mac_dst,
+				    sendinfo->mac_src, skb->len) < 0)
+			return;
+
+		dev_queue_xmit(skb);
+	}
 }
 
 static void ndisc_send_unsol_na(struct net_device *dev)
@@ -591,7 +619,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
 			      /*router=*/ !!idev->cnf.forwarding,
 			      /*solicited=*/ false, /*override=*/ true,
-			      /*inc_opt=*/ true);
+			      /*inc_opt=*/ true, NULL);
 	}
 	read_unlock_bh(&idev->lock);
 
@@ -932,7 +960,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 
 	if (dad) {
 		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
-			      !!is_router, false, (ifp != NULL), true);
+			      !!is_router, false, ifp, true, NULL);
 		goto out;
 	}
 
@@ -954,7 +982,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			     NDISC_NEIGHBOUR_SOLICITATION, &ndopts);
 	if (neigh || !dev->header_ops) {
 		ndisc_send_na(dev, saddr, &msg->target, !!is_router,
-			      true, (ifp != NULL && inc), inc);
+			      true, (ifp && inc), inc, NULL);
 		if (neigh)
 			neigh_release(neigh);
 	}

base-commit: c84d86a0295c24487db5b7db1a61d9c0eddfbb66
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
  2022-03-17  6:15 [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode Sun Shouxin
@ 2022-03-17  8:11 ` Jiri Pirko
  2022-03-18  9:49   ` 孙守鑫
  2022-03-17 18:49 ` David Ahern
  1 sibling, 1 reply; 10+ messages in thread
From: Jiri Pirko @ 2022-03-17  8:11 UTC (permalink / raw)
  To: Sun Shouxin
  Cc: j.vosburgh, vfalico, andy, davem, kuba, yoshfuji, dsahern,
	oliver, netdev, linux-kernel, huyd12

Thu, Mar 17, 2022 at 07:15:21AM CET, sunshouxin@chinatelecom.cn wrote:
>This patch is implementing IPV6 RLB for balance-alb mode.
>
>Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>


Could you please reply to my question I asked for v1:
Out of curiosity, what is exactly your usecase? I'm asking because
I don't see any good reason to use RLB/ALB modes. I have to be missing
something.

This is adding a lot of code in bonding that needs to be maintained.
However, if there is no particular need to add it, why would we?

Could you please spell out why exactly do you need this? I'm pretty sure
that in the end well find out, that you really don't need this at all.

Thanks!


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
  2022-03-17  6:15 [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode Sun Shouxin
  2022-03-17  8:11 ` Jiri Pirko
@ 2022-03-17 18:49 ` David Ahern
  2022-03-17 20:10   ` Jay Vosburgh
  2022-03-18  9:50   ` 孙守鑫
  1 sibling, 2 replies; 10+ messages in thread
From: David Ahern @ 2022-03-17 18:49 UTC (permalink / raw)
  To: Sun Shouxin, j.vosburgh, vfalico, andy, davem, kuba, yoshfuji, oliver
  Cc: netdev, linux-kernel, huyd12

On 3/17/22 12:15 AM, Sun Shouxin wrote:
> This patch is implementing IPV6 RLB for balance-alb mode.
> 
> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
> ---
> changelog:
> v1-->v2:
> -Remove ndisc_bond_send_na and refactor ndisc_send_na.
> -In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
> -Don't send neighbor advertisement message when receiving
>  neighbor advertisement message in rlb6_update_entry_from_na.
> 
> v2-->v3:
> -Don't export ndisc_send_na.
> -Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
>  in rlb6_update_client.
> 
> v3-->v4:
> -Submit all code at a whole patch.

you misunderstood Jakub's comment. The code should evolve with small,
focused patches and each patch needs to compile and function correctly
(ie., no breakage).

You need to respond to Jiri's question about why this feature is needed.
After that:

1. patch 1 adds void *data to ndisc_send_na stub function and
ndisc_send_na direct function. Update all places that use both
ndisc_send_na to pass NULL as the data parameter.

2. patch 2 refactors ndisc_send_na to handle the new data argument

3. patch 3 exports any IPv6 functions. explain why each needs to be
exported.

4. patch 4 .... bonding changes. (bonding folks can respond on how to
introduce that change).


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
  2022-03-17 18:49 ` David Ahern
@ 2022-03-17 20:10   ` Jay Vosburgh
  2022-03-18  9:53     ` 孙守鑫
  2022-03-18  9:50   ` 孙守鑫
  1 sibling, 1 reply; 10+ messages in thread
From: Jay Vosburgh @ 2022-03-17 20:10 UTC (permalink / raw)
  To: David Ahern
  Cc: Sun Shouxin, vfalico, andy, davem, kuba, yoshfuji, oliver,
	netdev, linux-kernel, huyd12

David Ahern <dsahern@kernel.org> wrote:

>On 3/17/22 12:15 AM, Sun Shouxin wrote:
>> This patch is implementing IPV6 RLB for balance-alb mode.
>> 
>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>> ---
>> changelog:
>> v1-->v2:
>> -Remove ndisc_bond_send_na and refactor ndisc_send_na.
>> -In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
>> -Don't send neighbor advertisement message when receiving
>>  neighbor advertisement message in rlb6_update_entry_from_na.
>> 
>> v2-->v3:
>> -Don't export ndisc_send_na.
>> -Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
>>  in rlb6_update_client.
>> 
>> v3-->v4:
>> -Submit all code at a whole patch.
>
>you misunderstood Jakub's comment. The code should evolve with small,
>focused patches and each patch needs to compile and function correctly
>(ie., no breakage).

	Agreed; the split of the patches was not at issue, it was that
each patch in a series must compile and the built kernel must function
rationally.

>You need to respond to Jiri's question about why this feature is needed.

	I'm not entirely sold on adding IPv6 RLB for balance-alb, but
the IPv4 version of it does see moderate levels of use, even now.  It's
less common than LACP by far, though.  I'd like to know why someone
would choose IPv6 RLB over LACP.  I wonder if this is a checklist item
somewhere that something must have "complete support for IPv6" or words
to that effect, versus an actual functional need.

>After that:
>
>1. patch 1 adds void *data to ndisc_send_na stub function and
>ndisc_send_na direct function. Update all places that use both
>ndisc_send_na to pass NULL as the data parameter.
>
>2. patch 2 refactors ndisc_send_na to handle the new data argument
>
>3. patch 3 exports any IPv6 functions. explain why each needs to be
>exported.
>
>4. patch 4 .... bonding changes. (bonding folks can respond on how to
>introduce that change).

	Looking at the previous patch for bonding, my two initial
requests are:

	1) A more detailed commit message.  The only way to understand
how any of this actually works is reading the code, there is no higher
level description.

	2) How does this interact with the IPv4 RLB logic?  Is it
possible for a given bond interface MAC to be "assigned" to two
different peers (one IPv4, one IPv6), and if so, does that behave in an
expected manner?  I.e., two peers on the network could receive
contradictory information via ARP and ND for the MAC address of a given
peer.  This is already possible with the IPv4 RLB, but with an
additional IPv6 RLB, a single peer could see two different MACs for a
given host (one via IPv4, one via IPv6), and another peer could see the
opposite, or even disjoint information across several peers.

	-J

---
	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
  2022-03-17  8:11 ` Jiri Pirko
@ 2022-03-18  9:49   ` 孙守鑫
  2022-03-18 11:34     ` Jiri Pirko
  0 siblings, 1 reply; 10+ messages in thread
From: 孙守鑫 @ 2022-03-18  9:49 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: j.vosburgh, vfalico, andy, davem, kuba, yoshfuji, dsahern,
	oliver, netdev, linux-kernel, huyd12


在 2022/3/17 16:11, Jiri Pirko 写道:
> Thu, Mar 17, 2022 at 07:15:21AM CET, sunshouxin@chinatelecom.cn wrote:
>> This patch is implementing IPV6 RLB for balance-alb mode.
>>
>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>
> Could you please reply to my question I asked for v1:
> Out of curiosity, what is exactly your usecase? I'm asking because
> I don't see any good reason to use RLB/ALB modes. I have to be missing
> something.
>
> This is adding a lot of code in bonding that needs to be maintained.
> However, if there is no particular need to add it, why would we?
>
> Could you please spell out why exactly do you need this? I'm pretty sure
> that in the end well find out, that you really don't need this at all.
>
> Thanks!


This patch is certainly aim fix one real issue in ou lab.
For historical inheritance, the bond6 with ipv4 is widely used in our lab.
We started to support ipv6 for all service last year, networking 
operation and maintenance team
think it does work with ipv6 ALB capacity take it for granted due to 
bond6's specification
but it doesn't work in the end. as you know, it is impossible to change 
link neworking to LACP
because of huge cost and effective to online server.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
  2022-03-17 18:49 ` David Ahern
  2022-03-17 20:10   ` Jay Vosburgh
@ 2022-03-18  9:50   ` 孙守鑫
  1 sibling, 0 replies; 10+ messages in thread
From: 孙守鑫 @ 2022-03-18  9:50 UTC (permalink / raw)
  To: David Ahern, j.vosburgh, vfalico, andy, davem, kuba, yoshfuji, oliver
  Cc: netdev, linux-kernel, huyd12


在 2022/3/18 2:49, David Ahern 写道:
> On 3/17/22 12:15 AM, Sun Shouxin wrote:
>> This patch is implementing IPV6 RLB for balance-alb mode.
>>
>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>> ---
>> changelog:
>> v1-->v2:
>> -Remove ndisc_bond_send_na and refactor ndisc_send_na.
>> -In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
>> -Don't send neighbor advertisement message when receiving
>>   neighbor advertisement message in rlb6_update_entry_from_na.
>>
>> v2-->v3:
>> -Don't export ndisc_send_na.
>> -Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
>>   in rlb6_update_client.
>>
>> v3-->v4:
>> -Submit all code at a whole patch.
> you misunderstood Jakub's comment. The code should evolve with small,
> focused patches and each patch needs to compile and function correctly
> (ie., no breakage).
>
> You need to respond to Jiri's question about why this feature is needed.
> After that:
>
> 1. patch 1 adds void *data to ndisc_send_na stub function and
> ndisc_send_na direct function. Update all places that use both
> ndisc_send_na to pass NULL as the data parameter.
>
> 2. patch 2 refactors ndisc_send_na to handle the new data argument
>
> 3. patch 3 exports any IPv6 functions. explain why each needs to be
> exported.
>
> 4. patch 4 .... bonding changes. (bonding folks can respond on how to
> introduce that change).


Thanks your warmly instruction for newbee, I'll resend soon.
Thanks again.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
  2022-03-17 20:10   ` Jay Vosburgh
@ 2022-03-18  9:53     ` 孙守鑫
  0 siblings, 0 replies; 10+ messages in thread
From: 孙守鑫 @ 2022-03-18  9:53 UTC (permalink / raw)
  To: Jay Vosburgh, David Ahern
  Cc: vfalico, andy, davem, kuba, yoshfuji, oliver, netdev,
	linux-kernel, huyd12


在 2022/3/18 4:10, Jay Vosburgh 写道:
> David Ahern <dsahern@kernel.org> wrote:
>
>> On 3/17/22 12:15 AM, Sun Shouxin wrote:
>>> This patch is implementing IPV6 RLB for balance-alb mode.
>>>
>>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>>> ---
>>> changelog:
>>> v1-->v2:
>>> -Remove ndisc_bond_send_na and refactor ndisc_send_na.
>>> -In rlb_nd_xmit, if the lladdr is not local, return curr_active_slave.
>>> -Don't send neighbor advertisement message when receiving
>>>   neighbor advertisement message in rlb6_update_entry_from_na.
>>>
>>> v2-->v3:
>>> -Don't export ndisc_send_na.
>>> -Use ipv6_stub->ndisc_send_na to replace ndisc_send_na
>>>   in rlb6_update_client.
>>>
>>> v3-->v4:
>>> -Submit all code at a whole patch.
>> you misunderstood Jakub's comment. The code should evolve with small,
>> focused patches and each patch needs to compile and function correctly
>> (ie., no breakage).
> 	Agreed; the split of the patches was not at issue, it was that
> each patch in a series must compile and the built kernel must function
> rationally.
>
>> You need to respond to Jiri's question about why this feature is needed.
> 	I'm not entirely sold on adding IPv6 RLB for balance-alb, but
> the IPv4 version of it does see moderate levels of use, even now.  It's
> less common than LACP by far, though.  I'd like to know why someone
> would choose IPv6 RLB over LACP.  I wonder if this is a checklist item
> somewhere that something must have "complete support for IPv6" or words
> to that effect, versus an actual functional need.


This patch is certainly aim fix one real issue in ou lab.
For historical inheritance, the bond6 with ipv4 is widely used in our lab.
We started to support ipv6 for all service last year, networking 
operation and maintenance team
think it does work with ipv6 ALB capacity take it for granted due to 
bond6's specification
but it doesn't work in the end. as you know, it is impossible to change 
link neworking to LACP
because of huge cost and effective to online server.
I believe this is the case another man meet as ipv6 promotion.


>> After that:
>>
>> 1. patch 1 adds void *data to ndisc_send_na stub function and
>> ndisc_send_na direct function. Update all places that use both
>> ndisc_send_na to pass NULL as the data parameter.
>>
>> 2. patch 2 refactors ndisc_send_na to handle the new data argument
>>
>> 3. patch 3 exports any IPv6 functions. explain why each needs to be
>> exported.
>>
>> 4. patch 4 .... bonding changes. (bonding folks can respond on how to
>> introduce that change).
> 	Looking at the previous patch for bonding, my two initial
> requests are:
>
> 	1) A more detailed commit message.  The only way to understand
> how any of this actually works is reading the code, there is no higher
> level description.
>
> 	2) How does this interact with the IPv4 RLB logic?  Is it
> possible for a given bond interface MAC to be "assigned" to two
> different peers (one IPv4, one IPv6), and if so, does that behave in an
> expected manner?  I.e., two peers on the network could receive
> contradictory information via ARP and ND for the MAC address of a given
> peer.  This is already possible with the IPv4 RLB, but with an
> additional IPv6 RLB, a single peer could see two different MACs for a
> given host (one via IPv4, one via IPv6), and another peer could see the
> opposite, or even disjoint information across several peers.
>
> 	-J


Sorry for not fully understood your question

If I understand correctly ,I don't think IPV6 ALB can interact with the 
Ipv4 RLB logic.
Since they use different neighbor table when sending packets , what's more,
in the process of ALB, the rx6_hashtbl is used by IPV6 and rx_hashtbl 
for IPV4.

please rectify me if miss your point.


>
> ---
> 	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
  2022-03-18  9:49   ` 孙守鑫
@ 2022-03-18 11:34     ` Jiri Pirko
  2022-03-21  1:17       ` 孙守鑫
  0 siblings, 1 reply; 10+ messages in thread
From: Jiri Pirko @ 2022-03-18 11:34 UTC (permalink / raw)
  To: 孙守鑫
  Cc: j.vosburgh, vfalico, andy, davem, kuba, yoshfuji, dsahern,
	oliver, netdev, linux-kernel, huyd12

Fri, Mar 18, 2022 at 10:49:02AM CET, sunshouxin@chinatelecom.cn wrote:
>
>在 2022/3/17 16:11, Jiri Pirko 写道:
>> Thu, Mar 17, 2022 at 07:15:21AM CET, sunshouxin@chinatelecom.cn wrote:
>> > This patch is implementing IPV6 RLB for balance-alb mode.
>> > 
>> > Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>> > Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>> 
>> Could you please reply to my question I asked for v1:
>> Out of curiosity, what is exactly your usecase? I'm asking because
>> I don't see any good reason to use RLB/ALB modes. I have to be missing
>> something.
>> 
>> This is adding a lot of code in bonding that needs to be maintained.
>> However, if there is no particular need to add it, why would we?
>> 
>> Could you please spell out why exactly do you need this? I'm pretty sure
>> that in the end well find out, that you really don't need this at all.
>> 
>> Thanks!
>
>
>This patch is certainly aim fix one real issue in ou lab.
>For historical inheritance, the bond6 with ipv4 is widely used in our lab.
>We started to support ipv6 for all service last year, networking operation
>and maintenance team
>think it does work with ipv6 ALB capacity take it for granted due to bond6's
>specification
>but it doesn't work in the end. as you know, it is impossible to change link
>neworking to LACP
>because of huge cost and effective to online server.

I don't follow. Why exactly can't you use LACP? Every switch supports
it.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
  2022-03-18 11:34     ` Jiri Pirko
@ 2022-03-21  1:17       ` 孙守鑫
  2022-03-21  9:52         ` Jiri Pirko
  0 siblings, 1 reply; 10+ messages in thread
From: 孙守鑫 @ 2022-03-21  1:17 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: j.vosburgh, vfalico, andy, davem, kuba, yoshfuji, dsahern,
	oliver, netdev, linux-kernel, huyd12


在 2022/3/18 19:34, Jiri Pirko 写道:
> Fri, Mar 18, 2022 at 10:49:02AM CET, sunshouxin@chinatelecom.cn wrote:
>> 在 2022/3/17 16:11, Jiri Pirko 写道:
>>> Thu, Mar 17, 2022 at 07:15:21AM CET, sunshouxin@chinatelecom.cn wrote:
>>>> This patch is implementing IPV6 RLB for balance-alb mode.
>>>>
>>>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>>>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>>> Could you please reply to my question I asked for v1:
>>> Out of curiosity, what is exactly your usecase? I'm asking because
>>> I don't see any good reason to use RLB/ALB modes. I have to be missing
>>> something.
>>>
>>> This is adding a lot of code in bonding that needs to be maintained.
>>> However, if there is no particular need to add it, why would we?
>>>
>>> Could you please spell out why exactly do you need this? I'm pretty sure
>>> that in the end well find out, that you really don't need this at all.
>>>
>>> Thanks!
>>
>> This patch is certainly aim fix one real issue in ou lab.
>> For historical inheritance, the bond6 with ipv4 is widely used in our lab.
>> We started to support ipv6 for all service last year, networking operation
>> and maintenance team
>> think it does work with ipv6 ALB capacity take it for granted due to bond6's
>> specification
>> but it doesn't work in the end. as you know, it is impossible to change link
>> neworking to LACP
>> because of huge cost and effective to online server.
> I don't follow. Why exactly can't you use LACP? Every switch supports
> it.


Hi jiri


Changing to Lacp means risk  to our online service requring high available.

Also,we have multiple DCs installed bond6,it is huge cost to change it.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode
  2022-03-21  1:17       ` 孙守鑫
@ 2022-03-21  9:52         ` Jiri Pirko
  0 siblings, 0 replies; 10+ messages in thread
From: Jiri Pirko @ 2022-03-21  9:52 UTC (permalink / raw)
  To: 孙守鑫
  Cc: j.vosburgh, vfalico, andy, davem, kuba, yoshfuji, dsahern,
	oliver, netdev, linux-kernel, huyd12

Mon, Mar 21, 2022 at 02:17:34AM CET, sunshouxin@chinatelecom.cn wrote:
>
>在 2022/3/18 19:34, Jiri Pirko 写道:
>> Fri, Mar 18, 2022 at 10:49:02AM CET, sunshouxin@chinatelecom.cn wrote:
>> > 在 2022/3/17 16:11, Jiri Pirko 写道:
>> > > Thu, Mar 17, 2022 at 07:15:21AM CET, sunshouxin@chinatelecom.cn wrote:
>> > > > This patch is implementing IPV6 RLB for balance-alb mode.
>> > > > 
>> > > > Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>> > > > Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>> > > Could you please reply to my question I asked for v1:
>> > > Out of curiosity, what is exactly your usecase? I'm asking because
>> > > I don't see any good reason to use RLB/ALB modes. I have to be missing
>> > > something.
>> > > 
>> > > This is adding a lot of code in bonding that needs to be maintained.
>> > > However, if there is no particular need to add it, why would we?
>> > > 
>> > > Could you please spell out why exactly do you need this? I'm pretty sure
>> > > that in the end well find out, that you really don't need this at all.
>> > > 
>> > > Thanks!
>> > 
>> > This patch is certainly aim fix one real issue in ou lab.
>> > For historical inheritance, the bond6 with ipv4 is widely used in our lab.
>> > We started to support ipv6 for all service last year, networking operation
>> > and maintenance team
>> > think it does work with ipv6 ALB capacity take it for granted due to bond6's
>> > specification
>> > but it doesn't work in the end. as you know, it is impossible to change link
>> > neworking to LACP
>> > because of huge cost and effective to online server.
>> I don't follow. Why exactly can't you use LACP? Every switch supports
>> it.
>
>
>Hi jiri
>
>
>Changing to Lacp means risk  to our online service requring high available.
>
>Also,we have multiple DCs installed bond6,it is huge cost to change it.

So? This is 0 argument in this discussion. I believe that adding this
amount of code to bonding for use case that could be simply replaced by
LACP is wrong and we should not do that. The oridingal ALB/RLB
implementation was done when LACP was not that widely used. But now it
is 2022 - different story.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-03-21  9:52 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-17  6:15 [PATCH v4] net:bonding:Add support for IPV6 RLB to balance-alb mode Sun Shouxin
2022-03-17  8:11 ` Jiri Pirko
2022-03-18  9:49   ` 孙守鑫
2022-03-18 11:34     ` Jiri Pirko
2022-03-21  1:17       ` 孙守鑫
2022-03-21  9:52         ` Jiri Pirko
2022-03-17 18:49 ` David Ahern
2022-03-17 20:10   ` Jay Vosburgh
2022-03-18  9:53     ` 孙守鑫
2022-03-18  9:50   ` 孙守鑫

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.