Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events
@ 2021-04-15 11:43 Hans Westgaard Ry
  2021-04-15 14:02 ` Hans Westgaard Ry
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Hans Westgaard Ry @ 2021-04-15 11:43 UTC (permalink / raw)
  To: linux-rdma; +Cc: Jack Morgenstein

Handling comm_channel_event in mlx4_master_comm_channel uses a double
loop to determine which slaves have requested work. The search is
always started at lowest slave. This leads to unfairness; lower VFs
tends to be prioritized over higher VFs.

The patch uses find_next_bit to determine which slaves to handle.
Fairness is implemented by always starting at the next to the last
start.

An MPI program has been used to measure improvements. It runs 500
ibv_reg_mr, synchronizes with all other instances and then runs 500
ibv_dereg_mr.

The results running 500 processes, time reported is for running 500
calls:

ibv_reg_mr:
             Mod.   Org.
mlx4_1    403.356ms 424.674ms
mlx4_2    403.355ms 424.674ms
mlx4_3    403.354ms 424.674ms
mlx4_4    403.355ms 424.674ms
mlx4_5    403.357ms 424.677ms
mlx4_6    403.354ms 424.676ms
mlx4_7    403.357ms 424.675ms
mlx4_8    403.355ms 424.675ms

ibv_dereg_mr:
             Mod.   Org.
mlx4_1    116.408ms 142.818ms
mlx4_2    116.434ms 142.793ms
mlx4_3    116.488ms 143.247ms
mlx4_4    116.679ms 143.230ms
mlx4_5    112.017ms 107.204ms
mlx4_6    112.032ms 107.516ms
mlx4_7    112.083ms 184.195ms
mlx4_8    115.089ms 190.618ms

Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c  | 75 +++++++++++++++++--------------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  1 +
 2 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c678344d22a2..24989e96ab9d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -51,7 +51,6 @@
 #include "fw.h"
 #include "fw_qos.h"
 #include "mlx4_stats.h"
-
 #define CMD_POLL_TOKEN 0xffff
 #define INBOX_MASK	0xffffffffffffff00ULL
 
@@ -2241,48 +2240,58 @@ void mlx4_master_comm_channel(struct work_struct *work)
 	struct mlx4_priv *priv =
 		container_of(mfunc, struct mlx4_priv, mfunc);
 	struct mlx4_dev *dev = &priv->dev;
-	__be32 *bit_vec;
 	u32 comm_cmd;
-	u32 vec;
-	int i, j, slave;
+	int i, slave;
 	int toggle;
 	int served = 0;
 	int reported = 0;
 	u32 slt;
-
-	bit_vec = master->comm_arm_bit_vector;
-	for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
-		vec = be32_to_cpu(bit_vec[i]);
-		for (j = 0; j < 32; j++) {
-			if (!(vec & (1 << j)))
-				continue;
-			++reported;
-			slave = (i * 32) + j;
-			comm_cmd = swab32(readl(
-					  &mfunc->comm[slave].slave_write));
-			slt = swab32(readl(&mfunc->comm[slave].slave_read))
-				     >> 31;
-			toggle = comm_cmd >> 31;
-			if (toggle != slt) {
-				if (master->slave_state[slave].comm_toggle
-				    != slt) {
-					pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
-						slave, slt,
-						master->slave_state[slave].comm_toggle);
-					master->slave_state[slave].comm_toggle =
-						slt;
-				}
-				mlx4_master_do_cmd(dev, slave,
-						   comm_cmd >> 16 & 0xff,
-						   comm_cmd & 0xffff, toggle);
-				++served;
+	u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
+	u32 nmbr_bits;
+	u32 prev_slave;
+	bool first = true;
+
+	for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++)
+		lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]);
+	nmbr_bits = dev->persist->num_vfs + 1;
+	if (++priv->next_slave >= nmbr_bits)
+		priv->next_slave = 0;
+	slave = priv->next_slave;
+	while (true) {
+		slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave);
+		if  (!first && slave >= priv->next_slave) {
+			break;
+		} else if (slave == nmbr_bits) {
+			if (!first)
+				break;
+			first = false;
+			slave = 0;
+			continue;
+		}
+		++reported;
+		comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write));
+		slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31;
+		toggle = comm_cmd >> 31;
+		if (toggle != slt) {
+			if (master->slave_state[slave].comm_toggle
+			    != slt) {
+				pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
+					slave, slt,
+					master->slave_state[slave].comm_toggle);
+				master->slave_state[slave].comm_toggle =
+					slt;
 			}
+			mlx4_master_do_cmd(dev, slave,
+					   comm_cmd >> 16 & 0xff,
+					   comm_cmd & 0xffff, toggle);
+			++served;
 		}
+		prev_slave = slave++;
 	}
 
 	if (reported && reported != served)
-		mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served\n",
-			  reported, served);
+		mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served %x %d\n",
+			  reported, served, lbit_vec[0], priv->next_slave);
 
 	if (mlx4_ARM_COMM_CHANNEL(dev))
 		mlx4_warn(dev, "Failed to arm comm channel events\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 64bed7ac3836..cd6ba80f4c90 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -924,6 +924,7 @@ struct mlx4_priv {
 
 	atomic_t		opreq_count;
 	struct work_struct	opreq_task;
+	u32			next_slave; /* mlx4_master_comm_channel */
 };
 
 static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events
  2021-04-15 11:43 [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events Hans Westgaard Ry
@ 2021-04-15 14:02 ` Hans Westgaard Ry
  2021-04-15 14:22 ` kernel test robot
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Hans Westgaard Ry @ 2021-04-15 14:02 UTC (permalink / raw)
  To: linux-rdma, Jason Gunthorpe, Doug Ledford; +Cc: Jack Morgenstein


On 4/15/21 1:43 PM, Hans Westgaard Ry wrote:
> Handling comm_channel_event in mlx4_master_comm_channel uses a double
> loop to determine which slaves have requested work. The search is
> always started at lowest slave. This leads to unfairness; lower VFs
> tends to be prioritized over higher VFs.
>
> The patch uses find_next_bit to determine which slaves to handle.
> Fairness is implemented by always starting at the next to the last
> start.
>
> An MPI program has been used to measure improvements. It runs 500
> ibv_reg_mr, synchronizes with all other instances and then runs 500
> ibv_dereg_mr.
>
> The results running 500 processes, time reported is for running 500
> calls:
>
> ibv_reg_mr:
>               Mod.   Org.
> mlx4_1    403.356ms 424.674ms
> mlx4_2    403.355ms 424.674ms
> mlx4_3    403.354ms 424.674ms
> mlx4_4    403.355ms 424.674ms
> mlx4_5    403.357ms 424.677ms
> mlx4_6    403.354ms 424.676ms
> mlx4_7    403.357ms 424.675ms
> mlx4_8    403.355ms 424.675ms
>
> ibv_dereg_mr:
>               Mod.   Org.
> mlx4_1    116.408ms 142.818ms
> mlx4_2    116.434ms 142.793ms
> mlx4_3    116.488ms 143.247ms
> mlx4_4    116.679ms 143.230ms
> mlx4_5    112.017ms 107.204ms
> mlx4_6    112.032ms 107.516ms
> mlx4_7    112.083ms 184.195ms
> mlx4_8    115.089ms 190.618ms
>
> Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
> ---
>   drivers/net/ethernet/mellanox/mlx4/cmd.c  | 75 +++++++++++++++++--------------
>   drivers/net/ethernet/mellanox/mlx4/mlx4.h |  1 +
>   2 files changed, 43 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
> index c678344d22a2..24989e96ab9d 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
> @@ -51,7 +51,6 @@
>   #include "fw.h"
>   #include "fw_qos.h"
>   #include "mlx4_stats.h"
> -
>   #define CMD_POLL_TOKEN 0xffff
>   #define INBOX_MASK	0xffffffffffffff00ULL
>   
> @@ -2241,48 +2240,58 @@ void mlx4_master_comm_channel(struct work_struct *work)
>   	struct mlx4_priv *priv =
>   		container_of(mfunc, struct mlx4_priv, mfunc);
>   	struct mlx4_dev *dev = &priv->dev;
> -	__be32 *bit_vec;
>   	u32 comm_cmd;
> -	u32 vec;
> -	int i, j, slave;
> +	int i, slave;
>   	int toggle;
>   	int served = 0;
>   	int reported = 0;
>   	u32 slt;
> -
> -	bit_vec = master->comm_arm_bit_vector;
> -	for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
> -		vec = be32_to_cpu(bit_vec[i]);
> -		for (j = 0; j < 32; j++) {
> -			if (!(vec & (1 << j)))
> -				continue;
> -			++reported;
> -			slave = (i * 32) + j;
> -			comm_cmd = swab32(readl(
> -					  &mfunc->comm[slave].slave_write));
> -			slt = swab32(readl(&mfunc->comm[slave].slave_read))
> -				     >> 31;
> -			toggle = comm_cmd >> 31;
> -			if (toggle != slt) {
> -				if (master->slave_state[slave].comm_toggle
> -				    != slt) {
> -					pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
> -						slave, slt,
> -						master->slave_state[slave].comm_toggle);
> -					master->slave_state[slave].comm_toggle =
> -						slt;
> -				}
> -				mlx4_master_do_cmd(dev, slave,
> -						   comm_cmd >> 16 & 0xff,
> -						   comm_cmd & 0xffff, toggle);
> -				++served;
> +	u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
> +	u32 nmbr_bits;
> +	u32 prev_slave;
> +	bool first = true;
> +
> +	for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++)
> +		lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]);
> +	nmbr_bits = dev->persist->num_vfs + 1;
> +	if (++priv->next_slave >= nmbr_bits)
> +		priv->next_slave = 0;
> +	slave = priv->next_slave;
> +	while (true) {
> +		slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave);
> +		if  (!first && slave >= priv->next_slave) {
> +			break;
> +		} else if (slave == nmbr_bits) {
> +			if (!first)
> +				break;
> +			first = false;
> +			slave = 0;
> +			continue;
> +		}
> +		++reported;
> +		comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write));
> +		slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31;
> +		toggle = comm_cmd >> 31;
> +		if (toggle != slt) {
> +			if (master->slave_state[slave].comm_toggle
> +			    != slt) {
> +				pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
> +					slave, slt,
> +					master->slave_state[slave].comm_toggle);
> +				master->slave_state[slave].comm_toggle =
> +					slt;
>   			}
> +			mlx4_master_do_cmd(dev, slave,
> +					   comm_cmd >> 16 & 0xff,
> +					   comm_cmd & 0xffff, toggle);
> +			++served;
>   		}
> +		prev_slave = slave++;
>   	}
>   
>   	if (reported && reported != served)
> -		mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served\n",
> -			  reported, served);
> +		mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served %x %d\n",
> +			  reported, served, lbit_vec[0], priv->next_slave);
>   
>   	if (mlx4_ARM_COMM_CHANNEL(dev))
>   		mlx4_warn(dev, "Failed to arm comm channel events\n");
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> index 64bed7ac3836..cd6ba80f4c90 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> @@ -924,6 +924,7 @@ struct mlx4_priv {
>   
>   	atomic_t		opreq_count;
>   	struct work_struct	opreq_task;
> +	u32			next_slave; /* mlx4_master_comm_channel */
>   };
>   
>   static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events
  2021-04-15 11:43 [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events Hans Westgaard Ry
  2021-04-15 14:02 ` Hans Westgaard Ry
@ 2021-04-15 14:22 ` kernel test robot
  2021-04-18 11:30 ` Leon Romanovsky
  2021-04-18 13:17 ` Tariq Toukan
  3 siblings, 0 replies; 5+ messages in thread
From: kernel test robot @ 2021-04-15 14:22 UTC (permalink / raw)
  To: Hans Westgaard Ry, linux-rdma; +Cc: kbuild-all, Jack Morgenstein


[-- Attachment #1: Type: text/plain, Size: 4392 bytes --]

Hi Hans,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on rdma/for-next]
[also build test WARNING on linus/master v5.12-rc7 next-20210414]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Hans-Westgaard-Ry/net-mlx4-Treat-VFs-fair-when-handling-comm_channel_events/20210415-194619
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
config: arc-allyesconfig (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/9de686423c117ccefb2c09e57ab16f84f0434c68
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Hans-Westgaard-Ry/net-mlx4-Treat-VFs-fair-when-handling-comm_channel_events/20210415-194619
        git checkout 9de686423c117ccefb2c09e57ab16f84f0434c68
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross W=1 ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/mellanox/mlx4/cmd.c: In function 'mlx4_master_comm_channel':
>> drivers/net/ethernet/mellanox/mlx4/cmd.c:2251:6: warning: variable 'prev_slave' set but not used [-Wunused-but-set-variable]
    2251 |  u32 prev_slave;
         |      ^~~~~~~~~~


vim +/prev_slave +2251 drivers/net/ethernet/mellanox/mlx4/cmd.c

  2230	
  2231	/* master command processing */
  2232	void mlx4_master_comm_channel(struct work_struct *work)
  2233	{
  2234		struct mlx4_mfunc_master_ctx *master =
  2235			container_of(work,
  2236				     struct mlx4_mfunc_master_ctx,
  2237				     comm_work);
  2238		struct mlx4_mfunc *mfunc =
  2239			container_of(master, struct mlx4_mfunc, master);
  2240		struct mlx4_priv *priv =
  2241			container_of(mfunc, struct mlx4_priv, mfunc);
  2242		struct mlx4_dev *dev = &priv->dev;
  2243		u32 comm_cmd;
  2244		int i, slave;
  2245		int toggle;
  2246		int served = 0;
  2247		int reported = 0;
  2248		u32 slt;
  2249		u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
  2250		u32 nmbr_bits;
> 2251		u32 prev_slave;
  2252		bool first = true;
  2253	
  2254		for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++)
  2255			lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]);
  2256		nmbr_bits = dev->persist->num_vfs + 1;
  2257		if (++priv->next_slave >= nmbr_bits)
  2258			priv->next_slave = 0;
  2259		slave = priv->next_slave;
  2260		while (true) {
  2261			slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave);
  2262			if  (!first && slave >= priv->next_slave) {
  2263				break;
  2264			} else if (slave == nmbr_bits) {
  2265				if (!first)
  2266					break;
  2267				first = false;
  2268				slave = 0;
  2269				continue;
  2270			}
  2271			++reported;
  2272			comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write));
  2273			slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31;
  2274			toggle = comm_cmd >> 31;
  2275			if (toggle != slt) {
  2276				if (master->slave_state[slave].comm_toggle
  2277				    != slt) {
  2278					pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
  2279						slave, slt,
  2280						master->slave_state[slave].comm_toggle);
  2281					master->slave_state[slave].comm_toggle =
  2282						slt;
  2283				}
  2284				mlx4_master_do_cmd(dev, slave,
  2285						   comm_cmd >> 16 & 0xff,
  2286						   comm_cmd & 0xffff, toggle);
  2287				++served;
  2288			}
  2289			prev_slave = slave++;
  2290		}
  2291	
  2292		if (reported && reported != served)
  2293			mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served %x %d\n",
  2294				  reported, served, lbit_vec[0], priv->next_slave);
  2295	
  2296		if (mlx4_ARM_COMM_CHANNEL(dev))
  2297			mlx4_warn(dev, "Failed to arm comm channel events\n");
  2298	}
  2299	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 67446 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events
  2021-04-15 11:43 [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events Hans Westgaard Ry
  2021-04-15 14:02 ` Hans Westgaard Ry
  2021-04-15 14:22 ` kernel test robot
@ 2021-04-18 11:30 ` Leon Romanovsky
  2021-04-18 13:17 ` Tariq Toukan
  3 siblings, 0 replies; 5+ messages in thread
From: Leon Romanovsky @ 2021-04-18 11:30 UTC (permalink / raw)
  To: Hans Westgaard Ry; +Cc: linux-rdma, Jack Morgenstein, Tariq Toukan

On Thu, Apr 15, 2021 at 01:43:42PM +0200, Hans Westgaard Ry wrote:
> Handling comm_channel_event in mlx4_master_comm_channel uses a double
> loop to determine which slaves have requested work. The search is
> always started at lowest slave. This leads to unfairness; lower VFs
> tends to be prioritized over higher VFs.
> 
> The patch uses find_next_bit to determine which slaves to handle.
> Fairness is implemented by always starting at the next to the last
> start.
> 
> An MPI program has been used to measure improvements. It runs 500
> ibv_reg_mr, synchronizes with all other instances and then runs 500
> ibv_dereg_mr.
> 
> The results running 500 processes, time reported is for running 500
> calls:
> 
> ibv_reg_mr:
>              Mod.   Org.
> mlx4_1    403.356ms 424.674ms
> mlx4_2    403.355ms 424.674ms
> mlx4_3    403.354ms 424.674ms
> mlx4_4    403.355ms 424.674ms
> mlx4_5    403.357ms 424.677ms
> mlx4_6    403.354ms 424.676ms
> mlx4_7    403.357ms 424.675ms
> mlx4_8    403.355ms 424.675ms
> 
> ibv_dereg_mr:
>              Mod.   Org.
> mlx4_1    116.408ms 142.818ms
> mlx4_2    116.434ms 142.793ms
> mlx4_3    116.488ms 143.247ms
> mlx4_4    116.679ms 143.230ms
> mlx4_5    112.017ms 107.204ms
> mlx4_6    112.032ms 107.516ms
> mlx4_7    112.083ms 184.195ms
> mlx4_8    115.089ms 190.618ms
> 
> Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
> ---
>  drivers/net/ethernet/mellanox/mlx4/cmd.c  | 75 +++++++++++++++++--------------
>  drivers/net/ethernet/mellanox/mlx4/mlx4.h |  1 +
>  2 files changed, 43 insertions(+), 33 deletions(-)

Please fix kbuild error and resubmit to the netdev ML with Tariq CCed.

Thanks

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events
  2021-04-15 11:43 [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events Hans Westgaard Ry
                   ` (2 preceding siblings ...)
  2021-04-18 11:30 ` Leon Romanovsky
@ 2021-04-18 13:17 ` Tariq Toukan
  3 siblings, 0 replies; 5+ messages in thread
From: Tariq Toukan @ 2021-04-18 13:17 UTC (permalink / raw)
  To: Hans Westgaard Ry, linux-rdma; +Cc: Jack Morgenstein



On 4/15/2021 2:43 PM, Hans Westgaard Ry wrote:
> Handling comm_channel_event in mlx4_master_comm_channel uses a double
> loop to determine which slaves have requested work. The search is
> always started at lowest slave. This leads to unfairness; lower VFs
> tends to be prioritized over higher VFs.
> 
> The patch uses find_next_bit to determine which slaves to handle.
> Fairness is implemented by always starting at the next to the last
> start.
> 
> An MPI program has been used to measure improvements. It runs 500
> ibv_reg_mr, synchronizes with all other instances and then runs 500
> ibv_dereg_mr.
> 
> The results running 500 processes, time reported is for running 500
> calls:
> 
> ibv_reg_mr:
>               Mod.   Org.
> mlx4_1    403.356ms 424.674ms
> mlx4_2    403.355ms 424.674ms
> mlx4_3    403.354ms 424.674ms
> mlx4_4    403.355ms 424.674ms
> mlx4_5    403.357ms 424.677ms
> mlx4_6    403.354ms 424.676ms
> mlx4_7    403.357ms 424.675ms
> mlx4_8    403.355ms 424.675ms
> 
> ibv_dereg_mr:
>               Mod.   Org.
> mlx4_1    116.408ms 142.818ms
> mlx4_2    116.434ms 142.793ms
> mlx4_3    116.488ms 143.247ms
> mlx4_4    116.679ms 143.230ms
> mlx4_5    112.017ms 107.204ms
> mlx4_6    112.032ms 107.516ms
> mlx4_7    112.083ms 184.195ms
> mlx4_8    115.089ms 190.618ms
> 
> Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
> ---
>   drivers/net/ethernet/mellanox/mlx4/cmd.c  | 75 +++++++++++++++++--------------
>   drivers/net/ethernet/mellanox/mlx4/mlx4.h |  1 +
>   2 files changed, 43 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
> index c678344d22a2..24989e96ab9d 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
> @@ -51,7 +51,6 @@
>   #include "fw.h"
>   #include "fw_qos.h"
>   #include "mlx4_stats.h"
> -
>   #define CMD_POLL_TOKEN 0xffff
>   #define INBOX_MASK	0xffffffffffffff00ULL
>   

Unrelated change. Please revert it.


> @@ -2241,48 +2240,58 @@ void mlx4_master_comm_channel(struct work_struct *work)
>   	struct mlx4_priv *priv =
>   		container_of(mfunc, struct mlx4_priv, mfunc);
>   	struct mlx4_dev *dev = &priv->dev;
> -	__be32 *bit_vec;
>   	u32 comm_cmd;
> -	u32 vec;
> -	int i, j, slave;
> +	int i, slave;
>   	int toggle;
>   	int served = 0;
>   	int reported = 0;
>   	u32 slt;
> -
> -	bit_vec = master->comm_arm_bit_vector;
> -	for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
> -		vec = be32_to_cpu(bit_vec[i]);
> -		for (j = 0; j < 32; j++) {
> -			if (!(vec & (1 << j)))
> -				continue;
> -			++reported;
> -			slave = (i * 32) + j;
> -			comm_cmd = swab32(readl(
> -					  &mfunc->comm[slave].slave_write));
> -			slt = swab32(readl(&mfunc->comm[slave].slave_read))
> -				     >> 31;
> -			toggle = comm_cmd >> 31;
> -			if (toggle != slt) {
> -				if (master->slave_state[slave].comm_toggle
> -				    != slt) {
> -					pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
> -						slave, slt,
> -						master->slave_state[slave].comm_toggle);
> -					master->slave_state[slave].comm_toggle =
> -						slt;
> -				}
> -				mlx4_master_do_cmd(dev, slave,
> -						   comm_cmd >> 16 & 0xff,
> -						   comm_cmd & 0xffff, toggle);
> -				++served;
> +	u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
> +	u32 nmbr_bits;
> +	u32 prev_slave;
> +	bool first = true;

Please maintain reversed Christmas tree when possible.
Split declaration from init if needed.

> +
> +	for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++)
> +		lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]);
> +	nmbr_bits = dev->persist->num_vfs + 1;
> +	if (++priv->next_slave >= nmbr_bits)
> +		priv->next_slave = 0;
> +	slave = priv->next_slave;
> +	while (true) {
> +		slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave);
> +		if  (!first && slave >= priv->next_slave) {
> +			break;
> +		} else if (slave == nmbr_bits) {

Unnecessary else, as if block breaks.

> +			if (!first)
> +				break;
> +			first = false;
> +			slave = 0;
> +			continue;
> +		}
> +		++reported;
> +		comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write));
> +		slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31;
> +		toggle = comm_cmd >> 31;
> +		if (toggle != slt) {
> +			if (master->slave_state[slave].comm_toggle
> +			    != slt) {
> +				pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
> +					slave, slt,
> +					master->slave_state[slave].comm_toggle);
> +				master->slave_state[slave].comm_toggle =
> +					slt;
>   			}
> +			mlx4_master_do_cmd(dev, slave,
> +					   comm_cmd >> 16 & 0xff,
> +					   comm_cmd & 0xffff, toggle);
> +			++served;
>   		}
> +		prev_slave = slave++;
>   	}
>   
>   	if (reported && reported != served)
> -		mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served\n",
> -			  reported, served);
> +		mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served %x %d\n",

Not clear from the string what these newly printed values are.
Improve the string/description.

> +			  reported, served, lbit_vec[0], priv->next_slave);
>   
>   	if (mlx4_ARM_COMM_CHANNEL(dev))
>   		mlx4_warn(dev, "Failed to arm comm channel events\n");
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> index 64bed7ac3836..cd6ba80f4c90 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> @@ -924,6 +924,7 @@ struct mlx4_priv {
>   
>   	atomic_t		opreq_count;
>   	struct work_struct	opreq_task;
> +	u32			next_slave; /* mlx4_master_comm_channel */

Move it to a more specific struct, consider struct mlx4_mfunc_master_ctx.

>   };
>   
>   static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
> 

Please submit to netdev mailing list, and CC needed maintainers.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, back to index

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-15 11:43 [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events Hans Westgaard Ry
2021-04-15 14:02 ` Hans Westgaard Ry
2021-04-15 14:22 ` kernel test robot
2021-04-18 11:30 ` Leon Romanovsky
2021-04-18 13:17 ` Tariq Toukan

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git