All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
To: <intel-wired-lan@lists.osuosl.org>
Cc: netdev@vger.kernel.org,
	Michal Wilczynski <michal.wilczynski@intel.com>,
	Raj Victor <victor.raj@intel.com>,
	horms@kernel.org, przemyslaw.kitszel@intel.com
Subject: Re: [Intel-wired-lan] [PATCH iwl-next v1 1/5] ice: Support 5 layer topology
Date: Mon, 19 Feb 2024 11:16:42 +0100	[thread overview]
Message-ID: <d5ccaa1e-459e-4ac4-8c70-cfec02d62d11@intel.com> (raw)
In-Reply-To: <20240219100555.7220-2-mateusz.polchlopek@intel.com>

On 2/19/2024 11:05 AM, Mateusz Polchlopek wrote:
> From: Raj Victor <victor.raj@intel.com>
> 
> There is a performance issue when the number of VSIs are not multiple
> of 8. This is caused due to the max children limitation per node(8) in
> 9 layer topology. The BW credits are shared evenly among the children
> by default. Assume one node has 8 children and the other has 1.
> The parent of these nodes share the BW credit equally among them.
> Apparently this causes a problem for the first node which has 8 children.
> The 9th VM get more BW credits than the first 8 VMs.
> 
> Example:
> 
> 1) With 8 VM's:
> for x in 0 1 2 3 4 5 6 7;
> do taskset -c ${x} netperf -P0 -H 172.68.169.125 &  sleep .1 ; done
> 
> tx_queue_0_packets: 23283027
> tx_queue_1_packets: 23292289
> tx_queue_2_packets: 23276136
> tx_queue_3_packets: 23279828
> tx_queue_4_packets: 23279828
> tx_queue_5_packets: 23279333
> tx_queue_6_packets: 23277745
> tx_queue_7_packets: 23279950
> tx_queue_8_packets: 0
> 
> 2) With 9 VM's:
> for x in 0 1 2 3 4 5 6 7 8;
> do taskset -c ${x} netperf -P0 -H 172.68.169.125 &  sleep .1 ; done
> 
> tx_queue_0_packets: 24163396
> tx_queue_1_packets: 24164623
> tx_queue_2_packets: 24163188
> tx_queue_3_packets: 24163701
> tx_queue_4_packets: 24163683
> tx_queue_5_packets: 24164668
> tx_queue_6_packets: 23327200
> tx_queue_7_packets: 24163853
> tx_queue_8_packets: 91101417
> 
> So on average queue 8 statistics show that 3.7 times more packets were
> send there than to the other queues.
> 
> The FW starting with version 3.20, has increased the max number of
> children per node by reducing the number of layers from 9 to 5. Reflect
> this on driver side.
> 
> Signed-off-by: Raj Victor <victor.raj@intel.com>
> Co-developed-by: Michal Wilczynski <michal.wilczynski@intel.com>
> Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
> Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
> ---
>   .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  23 ++
>   drivers/net/ethernet/intel/ice/ice_common.c   |   5 +
>   drivers/net/ethernet/intel/ice/ice_ddp.c      | 199 ++++++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_ddp.h      |   2 +
>   drivers/net/ethernet/intel/ice/ice_sched.h    |   3 +
>   drivers/net/ethernet/intel/ice/ice_type.h     |   1 +
>   6 files changed, 233 insertions(+)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> index b315c734455a..02102e937b30 100644
> --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> @@ -121,6 +121,7 @@ struct ice_aqc_list_caps_elem {
>   #define ICE_AQC_CAPS_PCIE_RESET_AVOIDANCE		0x0076
>   #define ICE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT		0x0077
>   #define ICE_AQC_CAPS_NVM_MGMT				0x0080
> +#define ICE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE		0x0085
>   #define ICE_AQC_CAPS_FW_LAG_SUPPORT			0x0092
>   #define ICE_AQC_BIT_ROCEV2_LAG				0x01
>   #define ICE_AQC_BIT_SRIOV_LAG				0x02
> @@ -819,6 +820,23 @@ struct ice_aqc_get_topo {
>   	__le32 addr_low;
>   };
>   
> +/* Get/Set Tx Topology (indirect 0x0418/0x0417) */
> +struct ice_aqc_get_set_tx_topo {
> +	u8 set_flags;
> +#define ICE_AQC_TX_TOPO_FLAGS_CORRER		BIT(0)
> +#define ICE_AQC_TX_TOPO_FLAGS_SRC_RAM		BIT(1)
> +#define ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW		BIT(4)
> +#define ICE_AQC_TX_TOPO_FLAGS_ISSUED		BIT(5)
> +
> +	u8 get_flags;
> +#define ICE_AQC_TX_TOPO_GET_RAM		2
> +
> +	__le16 reserved1;
> +	__le32 reserved2;
> +	__le32 addr_high;
> +	__le32 addr_low;
> +};
> +
>   /* Update TSE (indirect 0x0403)
>    * Get TSE (indirect 0x0404)
>    * Add TSE (indirect 0x0401)
> @@ -2547,6 +2565,7 @@ struct ice_aq_desc {
>   		struct ice_aqc_get_link_topo get_link_topo;
>   		struct ice_aqc_i2c read_write_i2c;
>   		struct ice_aqc_read_i2c_resp read_i2c_resp;
> +		struct ice_aqc_get_set_tx_topo get_set_tx_topo;
>   	} params;
>   };
>   
> @@ -2653,6 +2672,10 @@ enum ice_adminq_opc {
>   	ice_aqc_opc_query_sched_res			= 0x0412,
>   	ice_aqc_opc_remove_rl_profiles			= 0x0415,
>   
> +	/* tx topology commands */
> +	ice_aqc_opc_set_tx_topo				= 0x0417,
> +	ice_aqc_opc_get_tx_topo				= 0x0418,
> +
>   	/* PHY commands */
>   	ice_aqc_opc_get_phy_caps			= 0x0600,
>   	ice_aqc_opc_set_phy_cfg				= 0x0601,
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
> index 090a2b8b5ff2..175091011251 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.c
> +++ b/drivers/net/ethernet/intel/ice/ice_common.c
> @@ -1622,6 +1622,8 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
>   	case ice_aqc_opc_set_port_params:
>   	case ice_aqc_opc_get_vlan_mode_parameters:
>   	case ice_aqc_opc_set_vlan_mode_parameters:
> +	case ice_aqc_opc_set_tx_topo:
> +	case ice_aqc_opc_get_tx_topo:
>   	case ice_aqc_opc_add_recipe:
>   	case ice_aqc_opc_recipe_to_profile:
>   	case ice_aqc_opc_get_recipe:
> @@ -2178,6 +2180,9 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
>   		ice_debug(hw, ICE_DBG_INIT, "%s: sriov_lag = %u\n",
>   			  prefix, caps->sriov_lag);
>   		break;
> +	case ICE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE:
> +		caps->tx_sched_topo_comp_mode_en = (number == 1);
> +		break;
>   	default:
>   		/* Not one of the recognized common capabilities */
>   		found = false;
> diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c
> index 7532d11ad7f3..766437944774 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ddp.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c
> @@ -4,6 +4,7 @@
>   #include "ice_common.h"
>   #include "ice.h"
>   #include "ice_ddp.h"
> +#include "ice_sched.h"
>   
>   /* For supporting double VLAN mode, it is necessary to enable or disable certain
>    * boost tcam entries. The metadata labels names that match the following
> @@ -2263,3 +2264,201 @@ enum ice_ddp_state ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf,
>   
>   	return state;
>   }
> +
> +/**
> + * ice_get_set_tx_topo - get or set Tx topology
> + * @hw: pointer to the HW struct
> + * @buf: pointer to Tx topology buffer
> + * @buf_size: buffer size
> + * @cd: pointer to command details structure or NULL
> + * @flags: pointer to descriptor flags
> + * @set: 0-get, 1-set topology
> + *
> + * The function will get or set Tx topology
> + */
> +static int
> +ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size,
> +		    struct ice_sq_cd *cd, u8 *flags, bool set)
> +{
> +	struct ice_aqc_get_set_tx_topo *cmd;
> +	struct ice_aq_desc desc;
> +	int status;
> +
> +	cmd = &desc.params.get_set_tx_topo;
> +	if (set) {
> +		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_tx_topo);
> +		cmd->set_flags = ICE_AQC_TX_TOPO_FLAGS_ISSUED;
> +		/* requested to update a new topology, not a default topology */
> +		if (buf)
> +			cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM |
> +					  ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW;
> +	} else {
> +		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo);
> +		cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM;
> +	}
> +	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
> +	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
> +	if (status)
> +		return status;
> +	/* read the return flag values (first byte) for get operation */
> +	if (!set && flags)
> +		*flags = desc.params.get_set_tx_topo.set_flags;
> +
> +	return 0;
> +}
> +
> +/**
> + * ice_cfg_tx_topo - Initialize new Tx topology if available
> + * @hw: pointer to the HW struct
> + * @buf: pointer to Tx topology buffer
> + * @len: buffer size
> + *
> + * The function will apply the new Tx topology from the package buffer
> + * if available.
> + */
> +int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len)
> +{
> +	u8 *current_topo, *new_topo = NULL;
> +	struct ice_run_time_cfg_seg *seg;
> +	struct ice_buf_hdr *section;
> +	struct ice_pkg_hdr *pkg_hdr;
> +	enum ice_ddp_state state;
> +	u16 offset, size = 0;
> +	u32 reg = 0;
> +	int status;
> +	u8 flags;
> +
> +	if (!buf || !len)
> +		return -EINVAL;
> +
> +	/* Does FW support new Tx topology mode ? */
> +	if (!hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) {
> +		ice_debug(hw, ICE_DBG_INIT, "FW doesn't support compatibility mode\n");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	current_topo = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
> +	if (!current_topo)
> +		return -ENOMEM;
> +
> +	/* Get the current Tx topology */
> +	status = ice_get_set_tx_topo(hw, current_topo, ICE_AQ_MAX_BUF_LEN, NULL,
> +				     &flags, false);
> +
> +	kfree(current_topo);
> +
> +	if (status) {
> +		ice_debug(hw, ICE_DBG_INIT, "Get current topology is failed\n");
> +		return status;
> +	}
> +
> +	/* Is default topology already applied ? */
> +	if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
> +	    hw->num_tx_sched_layers == ICE_SCHED_9_LAYERS) {
> +		ice_debug(hw, ICE_DBG_INIT, "Default topology already applied\n");
> +		return -EEXIST;
> +	}
> +
> +	/* Is new topology already applied ? */
> +	if ((flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
> +	    hw->num_tx_sched_layers == ICE_SCHED_5_LAYERS) {
> +		ice_debug(hw, ICE_DBG_INIT, "New topology already applied\n");
> +		return -EEXIST;
> +	}
> +
> +	/* Setting topology already issued? */
> +	if (flags & ICE_AQC_TX_TOPO_FLAGS_ISSUED) {
> +		ice_debug(hw, ICE_DBG_INIT, "Update Tx topology was done by another PF\n");
> +		/* Add a small delay before exiting */
> +		msleep(2000);
> +		return -EEXIST;
> +	}
> +
> +	/* Change the topology from new to default (5 to 9) */
> +	if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
> +	    hw->num_tx_sched_layers == ICE_SCHED_5_LAYERS) {
> +		ice_debug(hw, ICE_DBG_INIT, "Change topology from 5 to 9 layers\n");
> +		goto update_topo;
> +	}
> +
> +	pkg_hdr = (struct ice_pkg_hdr *)buf;
> +	state = ice_verify_pkg(pkg_hdr, len);
> +	if (state) {
> +		ice_debug(hw, ICE_DBG_INIT, "Failed to verify pkg (err: %d)\n",
> +			  state);
> +		return -EIO;
> +	}
> +
> +	/* Find runtime configuration segment */
> +	seg = (struct ice_run_time_cfg_seg *)
> +	      ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE_RUN_TIME_CFG, pkg_hdr);
> +	if (!seg) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology segment is missing\n");
> +		return -EIO;
> +	}
> +
> +	if (le32_to_cpu(seg->buf_table.buf_count) < ICE_MIN_S_COUNT) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology segment count(%d) is wrong\n",
> +			  seg->buf_table.buf_count);
> +		return -EIO;
> +	}
> +
> +	section = ice_pkg_val_buf(seg->buf_table.buf_array);
> +	if (!section || le32_to_cpu(section->section_entry[0].type) !=
> +		ICE_SID_TX_5_LAYER_TOPO) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology section type is wrong\n");
> +		return -EIO;
> +	}
> +
> +	size = le16_to_cpu(section->section_entry[0].size);
> +	offset = le16_to_cpu(section->section_entry[0].offset);
> +	if (size < ICE_MIN_S_SZ || size > ICE_MAX_S_SZ) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology section size is wrong\n");
> +		return -EIO;
> +	}
> +
> +	/* Make sure the section fits in the buffer */
> +	if (offset + size > ICE_PKG_BUF_SIZE) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology buffer > 4K\n");
> +		return -EIO;
> +	}
> +
> +	/* Get the new topology buffer */
> +	new_topo = ((u8 *)section) + offset;
> +
> +update_topo:
> +	/* Acquire global lock to make sure that set topology issued
> +	 * by one PF.
> +	 */
> +	status = ice_acquire_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID, ICE_RES_WRITE,
> +				 ICE_GLOBAL_CFG_LOCK_TIMEOUT);
> +	if (status) {
> +		ice_debug(hw, ICE_DBG_INIT, "Failed to acquire global lock\n");
> +		return status;
> +	}
> +
> +	/* Check if reset was triggered already. */
> +	reg = rd32(hw, GLGEN_RSTAT);
> +	if (reg & GLGEN_RSTAT_DEVSTATE_M) {
> +		/* Reset is in progress, re-init the HW again */
> +		ice_debug(hw, ICE_DBG_INIT, "Reset is in progress. Layer topology might be applied already\n");
> +		ice_check_reset(hw);
> +		return 0;
> +	}
> +
> +	/* Set new topology */
> +	status = ice_get_set_tx_topo(hw, new_topo, size, NULL, NULL, true);
> +	if (status) {
> +		ice_debug(hw, ICE_DBG_INIT, "Failed setting Tx topology\n");
> +		return status;
> +	}
> +
> +	/* New topology is updated, delay 1 second before issuing the CORER */
> +	msleep(1000);
> +	ice_reset(hw, ICE_RESET_CORER);
> +	/* CORER will clear the global lock, so no explicit call
> +	 * required for release.
> +	 */
> +
> +	return 0;
> +}
> diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.h b/drivers/net/ethernet/intel/ice/ice_ddp.h
> index ff66c2ffb1a2..622543f08b43 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ddp.h
> +++ b/drivers/net/ethernet/intel/ice/ice_ddp.h
> @@ -454,4 +454,6 @@ u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld);
>   void *ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
>   			   u32 sect_type);
>   
> +int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len);
> +
>   #endif
> diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
> index 1aef05ea5a57..9baff6a857d8 100644
> --- a/drivers/net/ethernet/intel/ice/ice_sched.h
> +++ b/drivers/net/ethernet/intel/ice/ice_sched.h
> @@ -6,6 +6,9 @@
>   
>   #include "ice_common.h"
>   
> +#define ICE_SCHED_5_LAYERS	5
> +#define ICE_SCHED_9_LAYERS	9
> +
>   #define SCHED_NODE_NAME_MAX_LEN 32
>   
>   #define ICE_QGRP_LAYER_OFFSET	2
> diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
> index 657f97e2105f..f964f26664d0 100644
> --- a/drivers/net/ethernet/intel/ice/ice_type.h
> +++ b/drivers/net/ethernet/intel/ice/ice_type.h
> @@ -296,6 +296,7 @@ struct ice_hw_common_caps {
>   	bool pcie_reset_avoidance;
>   	/* Post update reset restriction */
>   	bool reset_restrict_support;
> +	bool tx_sched_topo_comp_mode_en;
>   };
>   
>   /* IEEE 1588 TIME_SYNC specific info */

This is of course v4, not v1, sorry for the mistake in tag
Mateusz

WARNING: multiple messages have this Message-ID (diff)
From: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
To: <intel-wired-lan@lists.osuosl.org>
Cc: <netdev@vger.kernel.org>, <horms@kernel.org>,
	<przemyslaw.kitszel@intel.com>, Raj Victor <victor.raj@intel.com>,
	"Michal Wilczynski" <michal.wilczynski@intel.com>
Subject: Re: [Intel-wired-lan] [PATCH iwl-next v1 1/5] ice: Support 5 layer topology
Date: Mon, 19 Feb 2024 11:16:42 +0100	[thread overview]
Message-ID: <d5ccaa1e-459e-4ac4-8c70-cfec02d62d11@intel.com> (raw)
In-Reply-To: <20240219100555.7220-2-mateusz.polchlopek@intel.com>

On 2/19/2024 11:05 AM, Mateusz Polchlopek wrote:
> From: Raj Victor <victor.raj@intel.com>
> 
> There is a performance issue when the number of VSIs are not multiple
> of 8. This is caused due to the max children limitation per node(8) in
> 9 layer topology. The BW credits are shared evenly among the children
> by default. Assume one node has 8 children and the other has 1.
> The parent of these nodes share the BW credit equally among them.
> Apparently this causes a problem for the first node which has 8 children.
> The 9th VM get more BW credits than the first 8 VMs.
> 
> Example:
> 
> 1) With 8 VM's:
> for x in 0 1 2 3 4 5 6 7;
> do taskset -c ${x} netperf -P0 -H 172.68.169.125 &  sleep .1 ; done
> 
> tx_queue_0_packets: 23283027
> tx_queue_1_packets: 23292289
> tx_queue_2_packets: 23276136
> tx_queue_3_packets: 23279828
> tx_queue_4_packets: 23279828
> tx_queue_5_packets: 23279333
> tx_queue_6_packets: 23277745
> tx_queue_7_packets: 23279950
> tx_queue_8_packets: 0
> 
> 2) With 9 VM's:
> for x in 0 1 2 3 4 5 6 7 8;
> do taskset -c ${x} netperf -P0 -H 172.68.169.125 &  sleep .1 ; done
> 
> tx_queue_0_packets: 24163396
> tx_queue_1_packets: 24164623
> tx_queue_2_packets: 24163188
> tx_queue_3_packets: 24163701
> tx_queue_4_packets: 24163683
> tx_queue_5_packets: 24164668
> tx_queue_6_packets: 23327200
> tx_queue_7_packets: 24163853
> tx_queue_8_packets: 91101417
> 
> So on average queue 8 statistics show that 3.7 times more packets were
> send there than to the other queues.
> 
> The FW starting with version 3.20, has increased the max number of
> children per node by reducing the number of layers from 9 to 5. Reflect
> this on driver side.
> 
> Signed-off-by: Raj Victor <victor.raj@intel.com>
> Co-developed-by: Michal Wilczynski <michal.wilczynski@intel.com>
> Signed-off-by: Michal Wilczynski <michal.wilczynski@intel.com>
> Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
> ---
>   .../net/ethernet/intel/ice/ice_adminq_cmd.h   |  23 ++
>   drivers/net/ethernet/intel/ice/ice_common.c   |   5 +
>   drivers/net/ethernet/intel/ice/ice_ddp.c      | 199 ++++++++++++++++++
>   drivers/net/ethernet/intel/ice/ice_ddp.h      |   2 +
>   drivers/net/ethernet/intel/ice/ice_sched.h    |   3 +
>   drivers/net/ethernet/intel/ice/ice_type.h     |   1 +
>   6 files changed, 233 insertions(+)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> index b315c734455a..02102e937b30 100644
> --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
> @@ -121,6 +121,7 @@ struct ice_aqc_list_caps_elem {
>   #define ICE_AQC_CAPS_PCIE_RESET_AVOIDANCE		0x0076
>   #define ICE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT		0x0077
>   #define ICE_AQC_CAPS_NVM_MGMT				0x0080
> +#define ICE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE		0x0085
>   #define ICE_AQC_CAPS_FW_LAG_SUPPORT			0x0092
>   #define ICE_AQC_BIT_ROCEV2_LAG				0x01
>   #define ICE_AQC_BIT_SRIOV_LAG				0x02
> @@ -819,6 +820,23 @@ struct ice_aqc_get_topo {
>   	__le32 addr_low;
>   };
>   
> +/* Get/Set Tx Topology (indirect 0x0418/0x0417) */
> +struct ice_aqc_get_set_tx_topo {
> +	u8 set_flags;
> +#define ICE_AQC_TX_TOPO_FLAGS_CORRER		BIT(0)
> +#define ICE_AQC_TX_TOPO_FLAGS_SRC_RAM		BIT(1)
> +#define ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW		BIT(4)
> +#define ICE_AQC_TX_TOPO_FLAGS_ISSUED		BIT(5)
> +
> +	u8 get_flags;
> +#define ICE_AQC_TX_TOPO_GET_RAM		2
> +
> +	__le16 reserved1;
> +	__le32 reserved2;
> +	__le32 addr_high;
> +	__le32 addr_low;
> +};
> +
>   /* Update TSE (indirect 0x0403)
>    * Get TSE (indirect 0x0404)
>    * Add TSE (indirect 0x0401)
> @@ -2547,6 +2565,7 @@ struct ice_aq_desc {
>   		struct ice_aqc_get_link_topo get_link_topo;
>   		struct ice_aqc_i2c read_write_i2c;
>   		struct ice_aqc_read_i2c_resp read_i2c_resp;
> +		struct ice_aqc_get_set_tx_topo get_set_tx_topo;
>   	} params;
>   };
>   
> @@ -2653,6 +2672,10 @@ enum ice_adminq_opc {
>   	ice_aqc_opc_query_sched_res			= 0x0412,
>   	ice_aqc_opc_remove_rl_profiles			= 0x0415,
>   
> +	/* tx topology commands */
> +	ice_aqc_opc_set_tx_topo				= 0x0417,
> +	ice_aqc_opc_get_tx_topo				= 0x0418,
> +
>   	/* PHY commands */
>   	ice_aqc_opc_get_phy_caps			= 0x0600,
>   	ice_aqc_opc_set_phy_cfg				= 0x0601,
> diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
> index 090a2b8b5ff2..175091011251 100644
> --- a/drivers/net/ethernet/intel/ice/ice_common.c
> +++ b/drivers/net/ethernet/intel/ice/ice_common.c
> @@ -1622,6 +1622,8 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
>   	case ice_aqc_opc_set_port_params:
>   	case ice_aqc_opc_get_vlan_mode_parameters:
>   	case ice_aqc_opc_set_vlan_mode_parameters:
> +	case ice_aqc_opc_set_tx_topo:
> +	case ice_aqc_opc_get_tx_topo:
>   	case ice_aqc_opc_add_recipe:
>   	case ice_aqc_opc_recipe_to_profile:
>   	case ice_aqc_opc_get_recipe:
> @@ -2178,6 +2180,9 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
>   		ice_debug(hw, ICE_DBG_INIT, "%s: sriov_lag = %u\n",
>   			  prefix, caps->sriov_lag);
>   		break;
> +	case ICE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE:
> +		caps->tx_sched_topo_comp_mode_en = (number == 1);
> +		break;
>   	default:
>   		/* Not one of the recognized common capabilities */
>   		found = false;
> diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c
> index 7532d11ad7f3..766437944774 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ddp.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c
> @@ -4,6 +4,7 @@
>   #include "ice_common.h"
>   #include "ice.h"
>   #include "ice_ddp.h"
> +#include "ice_sched.h"
>   
>   /* For supporting double VLAN mode, it is necessary to enable or disable certain
>    * boost tcam entries. The metadata labels names that match the following
> @@ -2263,3 +2264,201 @@ enum ice_ddp_state ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf,
>   
>   	return state;
>   }
> +
> +/**
> + * ice_get_set_tx_topo - get or set Tx topology
> + * @hw: pointer to the HW struct
> + * @buf: pointer to Tx topology buffer
> + * @buf_size: buffer size
> + * @cd: pointer to command details structure or NULL
> + * @flags: pointer to descriptor flags
> + * @set: 0-get, 1-set topology
> + *
> + * The function will get or set Tx topology
> + */
> +static int
> +ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size,
> +		    struct ice_sq_cd *cd, u8 *flags, bool set)
> +{
> +	struct ice_aqc_get_set_tx_topo *cmd;
> +	struct ice_aq_desc desc;
> +	int status;
> +
> +	cmd = &desc.params.get_set_tx_topo;
> +	if (set) {
> +		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_tx_topo);
> +		cmd->set_flags = ICE_AQC_TX_TOPO_FLAGS_ISSUED;
> +		/* requested to update a new topology, not a default topology */
> +		if (buf)
> +			cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM |
> +					  ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW;
> +	} else {
> +		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo);
> +		cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM;
> +	}
> +	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
> +	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
> +	if (status)
> +		return status;
> +	/* read the return flag values (first byte) for get operation */
> +	if (!set && flags)
> +		*flags = desc.params.get_set_tx_topo.set_flags;
> +
> +	return 0;
> +}
> +
> +/**
> + * ice_cfg_tx_topo - Initialize new Tx topology if available
> + * @hw: pointer to the HW struct
> + * @buf: pointer to Tx topology buffer
> + * @len: buffer size
> + *
> + * The function will apply the new Tx topology from the package buffer
> + * if available.
> + */
> +int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len)
> +{
> +	u8 *current_topo, *new_topo = NULL;
> +	struct ice_run_time_cfg_seg *seg;
> +	struct ice_buf_hdr *section;
> +	struct ice_pkg_hdr *pkg_hdr;
> +	enum ice_ddp_state state;
> +	u16 offset, size = 0;
> +	u32 reg = 0;
> +	int status;
> +	u8 flags;
> +
> +	if (!buf || !len)
> +		return -EINVAL;
> +
> +	/* Does FW support new Tx topology mode ? */
> +	if (!hw->func_caps.common_cap.tx_sched_topo_comp_mode_en) {
> +		ice_debug(hw, ICE_DBG_INIT, "FW doesn't support compatibility mode\n");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	current_topo = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
> +	if (!current_topo)
> +		return -ENOMEM;
> +
> +	/* Get the current Tx topology */
> +	status = ice_get_set_tx_topo(hw, current_topo, ICE_AQ_MAX_BUF_LEN, NULL,
> +				     &flags, false);
> +
> +	kfree(current_topo);
> +
> +	if (status) {
> +		ice_debug(hw, ICE_DBG_INIT, "Get current topology is failed\n");
> +		return status;
> +	}
> +
> +	/* Is default topology already applied ? */
> +	if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
> +	    hw->num_tx_sched_layers == ICE_SCHED_9_LAYERS) {
> +		ice_debug(hw, ICE_DBG_INIT, "Default topology already applied\n");
> +		return -EEXIST;
> +	}
> +
> +	/* Is new topology already applied ? */
> +	if ((flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
> +	    hw->num_tx_sched_layers == ICE_SCHED_5_LAYERS) {
> +		ice_debug(hw, ICE_DBG_INIT, "New topology already applied\n");
> +		return -EEXIST;
> +	}
> +
> +	/* Setting topology already issued? */
> +	if (flags & ICE_AQC_TX_TOPO_FLAGS_ISSUED) {
> +		ice_debug(hw, ICE_DBG_INIT, "Update Tx topology was done by another PF\n");
> +		/* Add a small delay before exiting */
> +		msleep(2000);
> +		return -EEXIST;
> +	}
> +
> +	/* Change the topology from new to default (5 to 9) */
> +	if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
> +	    hw->num_tx_sched_layers == ICE_SCHED_5_LAYERS) {
> +		ice_debug(hw, ICE_DBG_INIT, "Change topology from 5 to 9 layers\n");
> +		goto update_topo;
> +	}
> +
> +	pkg_hdr = (struct ice_pkg_hdr *)buf;
> +	state = ice_verify_pkg(pkg_hdr, len);
> +	if (state) {
> +		ice_debug(hw, ICE_DBG_INIT, "Failed to verify pkg (err: %d)\n",
> +			  state);
> +		return -EIO;
> +	}
> +
> +	/* Find runtime configuration segment */
> +	seg = (struct ice_run_time_cfg_seg *)
> +	      ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE_RUN_TIME_CFG, pkg_hdr);
> +	if (!seg) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology segment is missing\n");
> +		return -EIO;
> +	}
> +
> +	if (le32_to_cpu(seg->buf_table.buf_count) < ICE_MIN_S_COUNT) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology segment count(%d) is wrong\n",
> +			  seg->buf_table.buf_count);
> +		return -EIO;
> +	}
> +
> +	section = ice_pkg_val_buf(seg->buf_table.buf_array);
> +	if (!section || le32_to_cpu(section->section_entry[0].type) !=
> +		ICE_SID_TX_5_LAYER_TOPO) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology section type is wrong\n");
> +		return -EIO;
> +	}
> +
> +	size = le16_to_cpu(section->section_entry[0].size);
> +	offset = le16_to_cpu(section->section_entry[0].offset);
> +	if (size < ICE_MIN_S_SZ || size > ICE_MAX_S_SZ) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology section size is wrong\n");
> +		return -EIO;
> +	}
> +
> +	/* Make sure the section fits in the buffer */
> +	if (offset + size > ICE_PKG_BUF_SIZE) {
> +		ice_debug(hw, ICE_DBG_INIT, "5 layer topology buffer > 4K\n");
> +		return -EIO;
> +	}
> +
> +	/* Get the new topology buffer */
> +	new_topo = ((u8 *)section) + offset;
> +
> +update_topo:
> +	/* Acquire global lock to make sure that set topology issued
> +	 * by one PF.
> +	 */
> +	status = ice_acquire_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID, ICE_RES_WRITE,
> +				 ICE_GLOBAL_CFG_LOCK_TIMEOUT);
> +	if (status) {
> +		ice_debug(hw, ICE_DBG_INIT, "Failed to acquire global lock\n");
> +		return status;
> +	}
> +
> +	/* Check if reset was triggered already. */
> +	reg = rd32(hw, GLGEN_RSTAT);
> +	if (reg & GLGEN_RSTAT_DEVSTATE_M) {
> +		/* Reset is in progress, re-init the HW again */
> +		ice_debug(hw, ICE_DBG_INIT, "Reset is in progress. Layer topology might be applied already\n");
> +		ice_check_reset(hw);
> +		return 0;
> +	}
> +
> +	/* Set new topology */
> +	status = ice_get_set_tx_topo(hw, new_topo, size, NULL, NULL, true);
> +	if (status) {
> +		ice_debug(hw, ICE_DBG_INIT, "Failed setting Tx topology\n");
> +		return status;
> +	}
> +
> +	/* New topology is updated, delay 1 second before issuing the CORER */
> +	msleep(1000);
> +	ice_reset(hw, ICE_RESET_CORER);
> +	/* CORER will clear the global lock, so no explicit call
> +	 * required for release.
> +	 */
> +
> +	return 0;
> +}
> diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.h b/drivers/net/ethernet/intel/ice/ice_ddp.h
> index ff66c2ffb1a2..622543f08b43 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ddp.h
> +++ b/drivers/net/ethernet/intel/ice/ice_ddp.h
> @@ -454,4 +454,6 @@ u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld);
>   void *ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
>   			   u32 sect_type);
>   
> +int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len);
> +
>   #endif
> diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
> index 1aef05ea5a57..9baff6a857d8 100644
> --- a/drivers/net/ethernet/intel/ice/ice_sched.h
> +++ b/drivers/net/ethernet/intel/ice/ice_sched.h
> @@ -6,6 +6,9 @@
>   
>   #include "ice_common.h"
>   
> +#define ICE_SCHED_5_LAYERS	5
> +#define ICE_SCHED_9_LAYERS	9
> +
>   #define SCHED_NODE_NAME_MAX_LEN 32
>   
>   #define ICE_QGRP_LAYER_OFFSET	2
> diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
> index 657f97e2105f..f964f26664d0 100644
> --- a/drivers/net/ethernet/intel/ice/ice_type.h
> +++ b/drivers/net/ethernet/intel/ice/ice_type.h
> @@ -296,6 +296,7 @@ struct ice_hw_common_caps {
>   	bool pcie_reset_avoidance;
>   	/* Post update reset restriction */
>   	bool reset_restrict_support;
> +	bool tx_sched_topo_comp_mode_en;
>   };
>   
>   /* IEEE 1588 TIME_SYNC specific info */

This is of course v4, not v1, sorry for the mistake in tag
Mateusz

  reply	other threads:[~2024-02-19 10:16 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-19 10:05 [Intel-wired-lan] [PATCH iwl-next v4 0/5] ice: Support 5 layer Tx scheduler topology Mateusz Polchlopek
2024-02-19 10:05 ` Mateusz Polchlopek
2024-02-19 10:05 ` [Intel-wired-lan] [PATCH iwl-next v1 1/5] ice: Support 5 layer topology Mateusz Polchlopek
2024-02-19 10:05   ` Mateusz Polchlopek
2024-02-19 10:16   ` Mateusz Polchlopek [this message]
2024-02-19 10:16     ` Mateusz Polchlopek
2024-02-19 10:05 ` [Intel-wired-lan] [PATCH iwl-next v4 2/5] ice: Adjust the VSI/Aggregator layers Mateusz Polchlopek
2024-02-19 10:05   ` Mateusz Polchlopek
2024-02-19 10:05 ` [Intel-wired-lan] [PATCH iwl-next v4 3/5] ice: Enable switching default Tx scheduler topology Mateusz Polchlopek
2024-02-19 10:05   ` Mateusz Polchlopek
2024-02-19 10:05 ` [Intel-wired-lan] [PATCH iwl-next v4 4/5] ice: Add tx_scheduling_layers devlink param Mateusz Polchlopek
2024-02-19 10:05   ` Mateusz Polchlopek
2024-02-19 12:37   ` Jiri Pirko
2024-02-19 12:37     ` Jiri Pirko
2024-02-19 13:33     ` Przemek Kitszel
2024-02-19 13:33       ` Przemek Kitszel
2024-02-19 17:15       ` Jiri Pirko
2024-02-19 17:15         ` Jiri Pirko
2024-02-21 23:38     ` Jakub Kicinski
2024-02-21 23:38       ` Jakub Kicinski
2024-02-22 13:25       ` Mateusz Polchlopek
2024-02-22 13:25         ` Mateusz Polchlopek
2024-02-22 23:07         ` Jakub Kicinski
2024-02-22 23:07           ` Jakub Kicinski
2024-02-23  9:45           ` Jiri Pirko
2024-02-23  9:45             ` Jiri Pirko
2024-02-23 14:27             ` Jakub Kicinski
2024-02-23 14:27               ` Jakub Kicinski
2024-02-25  7:18               ` Jiri Pirko
2024-02-25  7:18                 ` Jiri Pirko
2024-02-27  2:37                 ` Jakub Kicinski
2024-02-27  2:37                   ` Jakub Kicinski
2024-02-27 12:17                   ` Jiri Pirko
2024-02-27 12:17                     ` Jiri Pirko
2024-02-27 13:05                     ` Przemek Kitszel
2024-02-27 13:05                       ` Przemek Kitszel
2024-02-27 15:39                       ` Jiri Pirko
2024-02-27 15:39                         ` Jiri Pirko
2024-02-27 15:41                       ` Andrew Lunn
2024-02-27 15:41                         ` Andrew Lunn
2024-02-27 16:04                         ` Jiri Pirko
2024-02-27 16:04                           ` Jiri Pirko
2024-02-27 20:38                           ` Andrew Lunn
2024-02-27 20:38                             ` Andrew Lunn
2024-02-19 10:05 ` [Intel-wired-lan] [PATCH iwl-next v4 5/5] ice: Document tx_scheduling_layers parameter Mateusz Polchlopek
2024-02-19 10:05   ` Mateusz Polchlopek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d5ccaa1e-459e-4ac4-8c70-cfec02d62d11@intel.com \
    --to=mateusz.polchlopek@intel.com \
    --cc=horms@kernel.org \
    --cc=intel-wired-lan@lists.osuosl.org \
    --cc=michal.wilczynski@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=przemyslaw.kitszel@intel.com \
    --cc=victor.raj@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.