netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: liaichun <liaichun@huawei.com>
To: Jay Vosburgh <jay.vosburgh@canonical.com>
Cc: liaichun <liaichun@huawei.com>,
	"davem@davemloft.net" <davem@davemloft.net>,
	"kuba@kernel.org" <kuba@kernel.org>,
	"vfalico@gmail.com" <vfalico@gmail.com>,
	"andy@greyhouse.net" <andy@greyhouse.net>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"Chenxiang (EulerOS)" <rose.chen@huawei.com>,
	moyufeng <moyufeng@huawei.com>
Subject: Re: [PATCH net v2]bonding: check port and aggregator when select
Date: Mon, 24 May 2021 15:21:19 +0000	[thread overview]
Message-ID: <87497cc383e249538c692e61f486e8fe@huawei.com> (raw)

Hi Jay:

	I repeated the "did not find a suitable aggregator" issue by repeatedly hot-plugging the NIC device.

	https://lore.kernel.org/netdev/52630cba-cc60-a024-8dd0-8319e5245044@huawei.com/
	Add the hot swap operation of network adapters:
		while true; do
        	ETS_SSHCMD "echo 1 > /sys/bus/pci/devices/ enp3s0 /remove" &
        	ETS_SSHCMD "echo 1 > /sys/bus/pci/devices/ enp4s0 /remove" &
        	ETS_SSHCMD "echo 1 > /sys/bus/pci/devices/ enp5s0 /remove" &
        	ETS_SSHCMD "echo 1 > /sys/bus/pci/devices/ enp6s0 /remove" &
        	sleep 10
        	ETS_SSHCMD "echo 1 > /sys/bus/pci/rescan"
			sleep 100
		done &

	The numbers of port_params updated, and then can not find a suitable aggregator.

The detailed code analysis is as follows:
bond_3ad_lacpdu_recv ->bond_3ad_rx_indication-> ad_rx_machine-> __record_pdu
static void __record_pdu(struct lacpdu *lacpdu, struct port *port)
{
	if (lacpdu && port) {
		struct port_params *partner = &port->partner_oper;

		__choose_matched(lacpdu, port);
		/* record the new parameter values for the partner
		 * operational
		 */
		partner->port_number = ntohs(lacpdu->actor_port);
		partner->port_priority = ntohs(lacpdu->actor_port_priority);
		partner->system = lacpdu->actor_system;
		partner->system_priority = ntohs(lacpdu->actor_system_priority);
		partner->key = ntohs(lacpdu->actor_key);
		partner->port_state = lacpdu->actor_state;
--------analysis: Update the member of the partner here.

		/* set actor_oper_port_state.defaulted to FALSE */
		port->actor_oper_port_state &= ~AD_STATE_DEFAULTED;

		/* set the partner sync. to on if the partner is sync,
		 * and the port is matched
		 */
		if ((port->sm_vars & AD_PORT_MATCHED) &&
		    (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) {
			partner->port_state |= AD_STATE_SYNCHRONIZATION;
			pr_debug("%s partner sync=1\n", port->slave->dev->name);
		} else {
			partner->port_state &= ~AD_STATE_SYNCHRONIZATION;
			pr_debug("%s partner sync=0\n", port->slave->dev->name);
		}
	}
}

static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
{
[...]
	/* search on all aggregators for a suitable aggregator for this port */
	bond_for_each_slave(bond, slave, iter) {
		aggregator = &(SLAVE_AD_INFO(slave)->aggregator);

		/* keep a free aggregator for later use(if needed) */
		if (!aggregator->lag_ports) {
			if (!free_aggregator)
				free_aggregator = aggregator;
			continue;
		}
		/* check if current aggregator suits us */

----analysis: here check the number of aggregator's partner, but it was updated by bond_3ad_lacpdu_recv

		if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && /* if all parameters match AND */
		     MAC_ADDRESS_EQUAL(&(aggregator->partner_system), &(port->partner_oper.system)) &&
		     (aggregator->partner_system_priority == port->partner_oper.system_priority) &&
		     (aggregator->partner_oper_aggregator_key == port->partner_oper.key)
		    ) &&
		    ((!MAC_ADDRESS_EQUAL(&(port->partner_oper.system), &(null_mac_addr)) && /* partner answers */
		      !aggregator->is_individual)  /* but is not individual OR */
		    )
		   ) {
			/* attach to the founded aggregator */
			port->aggregator = aggregator;
			port->actor_port_aggregator_identifier =
				port->aggregator->aggregator_identifier;
			port->next_port_in_aggregator = aggregator->lag_ports;
			port->aggregator->num_of_ports++;
			aggregator->lag_ports = port;
			netdev_dbg(bond->dev, "Port %d joined LAG %d(existing LAG)\n",
				   port->actor_port_number,
				   port->aggregator->aggregator_identifier);

			/* mark this port as selected */
			port->sm_vars |= AD_PORT_SELECTED;
			found = 1;
----analysis: found is 0
			break;
		}
	}
	/* the port couldn't find an aggregator - attach it to a new
	 * aggregator
	 */
	if (!found) {
----analysis: No proper free_aggregator is found.
		if (free_aggregator) {     
			/* assign port a new aggregator */
			port->aggregator = free_aggregator;
			port->actor_port_aggregator_identifier =
				port->aggregator->aggregator_identifier;

			/* update the new aggregator's parameters
			 * if port was responsed from the end-user
			 */
			if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)
				/* if port is full duplex */
				port->aggregator->is_individual = false;
			else
				port->aggregator->is_individual = true;

			port->aggregator->actor_admin_aggregator_key =
				port->actor_admin_port_key;
			port->aggregator->actor_oper_aggregator_key =
				port->actor_oper_port_key;
			port->aggregator->partner_system =
				port->partner_oper.system;
			port->aggregator->partner_system_priority =
				port->partner_oper.system_priority;
			port->aggregator->partner_oper_aggregator_key = port->partner_oper.key;
			port->aggregator->receive_state = 1;
			port->aggregator->transmit_state = 1;
			port->aggregator->lag_ports = port;
			port->aggregator->num_of_ports++;

			/* mark this port as selected */
			port->sm_vars |= AD_PORT_SELECTED;

			netdev_dbg(bond->dev, "Port %d joined LAG %d(new LAG)\n",
				   port->actor_port_number,
				   port->aggregator->aggregator_identifier);
		} else {
----analysis: The fault scenario goes to this branch, and port->aggregator remains NULL.
			netdev_err(bond->dev, "Port %d (on %s) did not find a suitable aggregator\n",
			       port->actor_port_number, port->slave->dev->name);
		}
	}
	/* if all aggregator's ports are READY_N == TRUE, set ready=TRUE
	 * in all aggregator's ports, else set ready=FALSE in all
	 * aggregator's ports
	 */
	__set_agg_ports_ready(port->aggregator,
			      __agg_ports_are_ready(port->aggregator));

----analysis: port->aggregator is still NULL, which causes problem.

 [...]
}

 In this case ,I want to use old aggregator, but init it's number.And I don't know if that's correct.

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index cfb0060fc..bd66f3f48 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1450,7 +1450,6 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
                                /* clear the port's relations to this
                                 * aggregator
                                 */
-                               port->aggregator = NULL;
                                port->next_port_in_aggregator = NULL;
                                port->actor_port_aggregator_identifier = 0;

@@ -1521,43 +1520,47 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
                if (free_aggregator) {
                        /* assign port a new aggregator */
                        port->aggregator = free_aggregator;
-                       port->actor_port_aggregator_identifier =
-                               port->aggregator->aggregator_identifier;
-
-                       /* update the new aggregator's parameters
-                        * if port was responsed from the end-user
-                        */
-                       if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)
-                               /* if port is full duplex */
-                               port->aggregator->is_individual = false;
-                       else
-                               port->aggregator->is_individual = true;
-
-                       port->aggregator->actor_admin_aggregator_key =
-                               port->actor_admin_port_key;
-                       port->aggregator->actor_oper_aggregator_key =
-                               port->actor_oper_port_key;
-                       port->aggregator->partner_system =
-                               port->partner_oper.system;
-                       port->aggregator->partner_system_priority =
-                               port->partner_oper.system_priority;
-                       port->aggregator->partner_oper_aggregator_key = port->partner_oper.key;
-                       port->aggregator->receive_state = 1;
-                       port->aggregator->transmit_state = 1;
-                       port->aggregator->lag_ports = port;
-                       port->aggregator->num_of_ports++;
-
-                       /* mark this port as selected */
-                       port->sm_vars |= AD_PORT_SELECTED;
-
-                       netdev_dbg(bond->dev, "Port %d joined LAG %d(new LAG)\n",
-                                  port->actor_port_number,
-                                  port->aggregator->aggregator_identifier);
                } else {
+                       /* use old aggregator */
                        netdev_err(bond->dev, "Port %d (on %s) did not find a suitable aggregator\n",
-                              port->actor_port_number, port->slave->dev->name);
-                       return;
+                                  port->actor_port_number, port->slave->dev->name);
+                       if (port->aggregator == NULL) {
+                               netdev_err(bond->dev, "old aggregator is still NULL\n");
+                               return;
+                       }
                }
+               port->actor_port_aggregator_identifier =
+                       port->aggregator->aggregator_identifier;
+
+               /* update the new aggregator's parameters
+                * if port was responsed from the end-user
+                */
+               if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)
+                       /* if port is full duplex */
+                       port->aggregator->is_individual = false;
+               else
+                       port->aggregator->is_individual = true;
+
+               port->aggregator->actor_admin_aggregator_key =
+                       port->actor_admin_port_key;
+               port->aggregator->actor_oper_aggregator_key =
+                       port->actor_oper_port_key;
+               port->aggregator->partner_system =
+                       port->partner_oper.system;
+               port->aggregator->partner_system_priority =
+                       port->partner_oper.system_priority;
+               port->aggregator->partner_oper_aggregator_key = port->partner_oper.key;
+               port->aggregator->receive_state = 1;
+               port->aggregator->transmit_state = 1;
+               port->aggregator->lag_ports = port;
+               port->aggregator->num_of_ports++;
+
+               /* mark this port as selected */
+               port->sm_vars |= AD_PORT_SELECTED;
+
+               netdev_dbg(bond->dev, "Port %d joined LAG %d(new LAG)\n",
+                          port->actor_port_number,
+                          port->aggregator->aggregator_identifier);
        }
        /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE
         * in all aggregator's ports, else set ready=FALSE in all

Thanks.

             reply	other threads:[~2021-05-24 15:23 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-24 15:21 liaichun [this message]
  -- strict thread matches above, loose matches on Subject: below --
2021-04-15 11:58 [PATCH net v2]bonding: check port and aggregator when select liaichun
2021-02-03  1:54 liaichun
2021-01-28  8:20 Aichun Li
2021-02-01 21:52 ` Jakub Kicinski
2021-02-02 20:02 ` Jay Vosburgh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87497cc383e249538c692e61f486e8fe@huawei.com \
    --to=liaichun@huawei.com \
    --cc=andy@greyhouse.net \
    --cc=davem@davemloft.net \
    --cc=jay.vosburgh@canonical.com \
    --cc=kuba@kernel.org \
    --cc=moyufeng@huawei.com \
    --cc=netdev@vger.kernel.org \
    --cc=rose.chen@huawei.com \
    --cc=vfalico@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).