* Re: [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events
2021-04-15 11:43 [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events Hans Westgaard Ry
@ 2021-04-15 14:02 ` Hans Westgaard Ry
2021-04-15 14:22 ` kernel test robot
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Hans Westgaard Ry @ 2021-04-15 14:02 UTC (permalink / raw)
To: linux-rdma, Jason Gunthorpe, Doug Ledford; +Cc: Jack Morgenstein
On 4/15/21 1:43 PM, Hans Westgaard Ry wrote:
> Handling comm_channel_event in mlx4_master_comm_channel uses a double
> loop to determine which slaves have requested work. The search is
> always started at lowest slave. This leads to unfairness; lower VFs
> tends to be prioritized over higher VFs.
>
> The patch uses find_next_bit to determine which slaves to handle.
> Fairness is implemented by always starting at the next to the last
> start.
>
> An MPI program has been used to measure improvements. It runs 500
> ibv_reg_mr, synchronizes with all other instances and then runs 500
> ibv_dereg_mr.
>
> The results running 500 processes, time reported is for running 500
> calls:
>
> ibv_reg_mr:
> Mod. Org.
> mlx4_1 403.356ms 424.674ms
> mlx4_2 403.355ms 424.674ms
> mlx4_3 403.354ms 424.674ms
> mlx4_4 403.355ms 424.674ms
> mlx4_5 403.357ms 424.677ms
> mlx4_6 403.354ms 424.676ms
> mlx4_7 403.357ms 424.675ms
> mlx4_8 403.355ms 424.675ms
>
> ibv_dereg_mr:
> Mod. Org.
> mlx4_1 116.408ms 142.818ms
> mlx4_2 116.434ms 142.793ms
> mlx4_3 116.488ms 143.247ms
> mlx4_4 116.679ms 143.230ms
> mlx4_5 112.017ms 107.204ms
> mlx4_6 112.032ms 107.516ms
> mlx4_7 112.083ms 184.195ms
> mlx4_8 115.089ms 190.618ms
>
> Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
> ---
> drivers/net/ethernet/mellanox/mlx4/cmd.c | 75 +++++++++++++++++--------------
> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 +
> 2 files changed, 43 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
> index c678344d22a2..24989e96ab9d 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
> @@ -51,7 +51,6 @@
> #include "fw.h"
> #include "fw_qos.h"
> #include "mlx4_stats.h"
> -
> #define CMD_POLL_TOKEN 0xffff
> #define INBOX_MASK 0xffffffffffffff00ULL
>
> @@ -2241,48 +2240,58 @@ void mlx4_master_comm_channel(struct work_struct *work)
> struct mlx4_priv *priv =
> container_of(mfunc, struct mlx4_priv, mfunc);
> struct mlx4_dev *dev = &priv->dev;
> - __be32 *bit_vec;
> u32 comm_cmd;
> - u32 vec;
> - int i, j, slave;
> + int i, slave;
> int toggle;
> int served = 0;
> int reported = 0;
> u32 slt;
> -
> - bit_vec = master->comm_arm_bit_vector;
> - for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
> - vec = be32_to_cpu(bit_vec[i]);
> - for (j = 0; j < 32; j++) {
> - if (!(vec & (1 << j)))
> - continue;
> - ++reported;
> - slave = (i * 32) + j;
> - comm_cmd = swab32(readl(
> - &mfunc->comm[slave].slave_write));
> - slt = swab32(readl(&mfunc->comm[slave].slave_read))
> - >> 31;
> - toggle = comm_cmd >> 31;
> - if (toggle != slt) {
> - if (master->slave_state[slave].comm_toggle
> - != slt) {
> - pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
> - slave, slt,
> - master->slave_state[slave].comm_toggle);
> - master->slave_state[slave].comm_toggle =
> - slt;
> - }
> - mlx4_master_do_cmd(dev, slave,
> - comm_cmd >> 16 & 0xff,
> - comm_cmd & 0xffff, toggle);
> - ++served;
> + u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
> + u32 nmbr_bits;
> + u32 prev_slave;
> + bool first = true;
> +
> + for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++)
> + lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]);
> + nmbr_bits = dev->persist->num_vfs + 1;
> + if (++priv->next_slave >= nmbr_bits)
> + priv->next_slave = 0;
> + slave = priv->next_slave;
> + while (true) {
> + slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave);
> + if (!first && slave >= priv->next_slave) {
> + break;
> + } else if (slave == nmbr_bits) {
> + if (!first)
> + break;
> + first = false;
> + slave = 0;
> + continue;
> + }
> + ++reported;
> + comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write));
> + slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31;
> + toggle = comm_cmd >> 31;
> + if (toggle != slt) {
> + if (master->slave_state[slave].comm_toggle
> + != slt) {
> + pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
> + slave, slt,
> + master->slave_state[slave].comm_toggle);
> + master->slave_state[slave].comm_toggle =
> + slt;
> }
> + mlx4_master_do_cmd(dev, slave,
> + comm_cmd >> 16 & 0xff,
> + comm_cmd & 0xffff, toggle);
> + ++served;
> }
> + prev_slave = slave++;
> }
>
> if (reported && reported != served)
> - mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served\n",
> - reported, served);
> + mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served %x %d\n",
> + reported, served, lbit_vec[0], priv->next_slave);
>
> if (mlx4_ARM_COMM_CHANNEL(dev))
> mlx4_warn(dev, "Failed to arm comm channel events\n");
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> index 64bed7ac3836..cd6ba80f4c90 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> @@ -924,6 +924,7 @@ struct mlx4_priv {
>
> atomic_t opreq_count;
> struct work_struct opreq_task;
> + u32 next_slave; /* mlx4_master_comm_channel */
> };
>
> static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events
2021-04-15 11:43 [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events Hans Westgaard Ry
2021-04-15 14:02 ` Hans Westgaard Ry
@ 2021-04-15 14:22 ` kernel test robot
2021-04-18 11:30 ` Leon Romanovsky
2021-04-18 13:17 ` Tariq Toukan
3 siblings, 0 replies; 5+ messages in thread
From: kernel test robot @ 2021-04-15 14:22 UTC (permalink / raw)
To: Hans Westgaard Ry, linux-rdma; +Cc: kbuild-all, Jack Morgenstein
[-- Attachment #1: Type: text/plain, Size: 4392 bytes --]
Hi Hans,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on rdma/for-next]
[also build test WARNING on linus/master v5.12-rc7 next-20210414]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Hans-Westgaard-Ry/net-mlx4-Treat-VFs-fair-when-handling-comm_channel_events/20210415-194619
base: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
config: arc-allyesconfig (attached as .config)
compiler: arceb-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/9de686423c117ccefb2c09e57ab16f84f0434c68
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Hans-Westgaard-Ry/net-mlx4-Treat-VFs-fair-when-handling-comm_channel_events/20210415-194619
git checkout 9de686423c117ccefb2c09e57ab16f84f0434c68
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross W=1 ARCH=arc
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
drivers/net/ethernet/mellanox/mlx4/cmd.c: In function 'mlx4_master_comm_channel':
>> drivers/net/ethernet/mellanox/mlx4/cmd.c:2251:6: warning: variable 'prev_slave' set but not used [-Wunused-but-set-variable]
2251 | u32 prev_slave;
| ^~~~~~~~~~
vim +/prev_slave +2251 drivers/net/ethernet/mellanox/mlx4/cmd.c
2230
2231 /* master command processing */
2232 void mlx4_master_comm_channel(struct work_struct *work)
2233 {
2234 struct mlx4_mfunc_master_ctx *master =
2235 container_of(work,
2236 struct mlx4_mfunc_master_ctx,
2237 comm_work);
2238 struct mlx4_mfunc *mfunc =
2239 container_of(master, struct mlx4_mfunc, master);
2240 struct mlx4_priv *priv =
2241 container_of(mfunc, struct mlx4_priv, mfunc);
2242 struct mlx4_dev *dev = &priv->dev;
2243 u32 comm_cmd;
2244 int i, slave;
2245 int toggle;
2246 int served = 0;
2247 int reported = 0;
2248 u32 slt;
2249 u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
2250 u32 nmbr_bits;
> 2251 u32 prev_slave;
2252 bool first = true;
2253
2254 for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++)
2255 lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]);
2256 nmbr_bits = dev->persist->num_vfs + 1;
2257 if (++priv->next_slave >= nmbr_bits)
2258 priv->next_slave = 0;
2259 slave = priv->next_slave;
2260 while (true) {
2261 slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave);
2262 if (!first && slave >= priv->next_slave) {
2263 break;
2264 } else if (slave == nmbr_bits) {
2265 if (!first)
2266 break;
2267 first = false;
2268 slave = 0;
2269 continue;
2270 }
2271 ++reported;
2272 comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write));
2273 slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31;
2274 toggle = comm_cmd >> 31;
2275 if (toggle != slt) {
2276 if (master->slave_state[slave].comm_toggle
2277 != slt) {
2278 pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
2279 slave, slt,
2280 master->slave_state[slave].comm_toggle);
2281 master->slave_state[slave].comm_toggle =
2282 slt;
2283 }
2284 mlx4_master_do_cmd(dev, slave,
2285 comm_cmd >> 16 & 0xff,
2286 comm_cmd & 0xffff, toggle);
2287 ++served;
2288 }
2289 prev_slave = slave++;
2290 }
2291
2292 if (reported && reported != served)
2293 mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served %x %d\n",
2294 reported, served, lbit_vec[0], priv->next_slave);
2295
2296 if (mlx4_ARM_COMM_CHANNEL(dev))
2297 mlx4_warn(dev, "Failed to arm comm channel events\n");
2298 }
2299
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 67446 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events
2021-04-15 11:43 [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events Hans Westgaard Ry
2021-04-15 14:02 ` Hans Westgaard Ry
2021-04-15 14:22 ` kernel test robot
@ 2021-04-18 11:30 ` Leon Romanovsky
2021-04-18 13:17 ` Tariq Toukan
3 siblings, 0 replies; 5+ messages in thread
From: Leon Romanovsky @ 2021-04-18 11:30 UTC (permalink / raw)
To: Hans Westgaard Ry; +Cc: linux-rdma, Jack Morgenstein, Tariq Toukan
On Thu, Apr 15, 2021 at 01:43:42PM +0200, Hans Westgaard Ry wrote:
> Handling comm_channel_event in mlx4_master_comm_channel uses a double
> loop to determine which slaves have requested work. The search is
> always started at lowest slave. This leads to unfairness; lower VFs
> tends to be prioritized over higher VFs.
>
> The patch uses find_next_bit to determine which slaves to handle.
> Fairness is implemented by always starting at the next to the last
> start.
>
> An MPI program has been used to measure improvements. It runs 500
> ibv_reg_mr, synchronizes with all other instances and then runs 500
> ibv_dereg_mr.
>
> The results running 500 processes, time reported is for running 500
> calls:
>
> ibv_reg_mr:
> Mod. Org.
> mlx4_1 403.356ms 424.674ms
> mlx4_2 403.355ms 424.674ms
> mlx4_3 403.354ms 424.674ms
> mlx4_4 403.355ms 424.674ms
> mlx4_5 403.357ms 424.677ms
> mlx4_6 403.354ms 424.676ms
> mlx4_7 403.357ms 424.675ms
> mlx4_8 403.355ms 424.675ms
>
> ibv_dereg_mr:
> Mod. Org.
> mlx4_1 116.408ms 142.818ms
> mlx4_2 116.434ms 142.793ms
> mlx4_3 116.488ms 143.247ms
> mlx4_4 116.679ms 143.230ms
> mlx4_5 112.017ms 107.204ms
> mlx4_6 112.032ms 107.516ms
> mlx4_7 112.083ms 184.195ms
> mlx4_8 115.089ms 190.618ms
>
> Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
> ---
> drivers/net/ethernet/mellanox/mlx4/cmd.c | 75 +++++++++++++++++--------------
> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 +
> 2 files changed, 43 insertions(+), 33 deletions(-)
Please fix kbuild error and resubmit to the netdev ML with Tariq CCed.
Thanks
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events
2021-04-15 11:43 [PATCH] net/mlx4: Treat VFs fair when handling comm_channel_events Hans Westgaard Ry
` (2 preceding siblings ...)
2021-04-18 11:30 ` Leon Romanovsky
@ 2021-04-18 13:17 ` Tariq Toukan
3 siblings, 0 replies; 5+ messages in thread
From: Tariq Toukan @ 2021-04-18 13:17 UTC (permalink / raw)
To: Hans Westgaard Ry, linux-rdma; +Cc: Jack Morgenstein
On 4/15/2021 2:43 PM, Hans Westgaard Ry wrote:
> Handling comm_channel_event in mlx4_master_comm_channel uses a double
> loop to determine which slaves have requested work. The search is
> always started at lowest slave. This leads to unfairness; lower VFs
> tends to be prioritized over higher VFs.
>
> The patch uses find_next_bit to determine which slaves to handle.
> Fairness is implemented by always starting at the next to the last
> start.
>
> An MPI program has been used to measure improvements. It runs 500
> ibv_reg_mr, synchronizes with all other instances and then runs 500
> ibv_dereg_mr.
>
> The results running 500 processes, time reported is for running 500
> calls:
>
> ibv_reg_mr:
> Mod. Org.
> mlx4_1 403.356ms 424.674ms
> mlx4_2 403.355ms 424.674ms
> mlx4_3 403.354ms 424.674ms
> mlx4_4 403.355ms 424.674ms
> mlx4_5 403.357ms 424.677ms
> mlx4_6 403.354ms 424.676ms
> mlx4_7 403.357ms 424.675ms
> mlx4_8 403.355ms 424.675ms
>
> ibv_dereg_mr:
> Mod. Org.
> mlx4_1 116.408ms 142.818ms
> mlx4_2 116.434ms 142.793ms
> mlx4_3 116.488ms 143.247ms
> mlx4_4 116.679ms 143.230ms
> mlx4_5 112.017ms 107.204ms
> mlx4_6 112.032ms 107.516ms
> mlx4_7 112.083ms 184.195ms
> mlx4_8 115.089ms 190.618ms
>
> Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
> Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
> ---
> drivers/net/ethernet/mellanox/mlx4/cmd.c | 75 +++++++++++++++++--------------
> drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 +
> 2 files changed, 43 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
> index c678344d22a2..24989e96ab9d 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
> @@ -51,7 +51,6 @@
> #include "fw.h"
> #include "fw_qos.h"
> #include "mlx4_stats.h"
> -
> #define CMD_POLL_TOKEN 0xffff
> #define INBOX_MASK 0xffffffffffffff00ULL
>
Unrelated change. Please revert it.
> @@ -2241,48 +2240,58 @@ void mlx4_master_comm_channel(struct work_struct *work)
> struct mlx4_priv *priv =
> container_of(mfunc, struct mlx4_priv, mfunc);
> struct mlx4_dev *dev = &priv->dev;
> - __be32 *bit_vec;
> u32 comm_cmd;
> - u32 vec;
> - int i, j, slave;
> + int i, slave;
> int toggle;
> int served = 0;
> int reported = 0;
> u32 slt;
> -
> - bit_vec = master->comm_arm_bit_vector;
> - for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
> - vec = be32_to_cpu(bit_vec[i]);
> - for (j = 0; j < 32; j++) {
> - if (!(vec & (1 << j)))
> - continue;
> - ++reported;
> - slave = (i * 32) + j;
> - comm_cmd = swab32(readl(
> - &mfunc->comm[slave].slave_write));
> - slt = swab32(readl(&mfunc->comm[slave].slave_read))
> - >> 31;
> - toggle = comm_cmd >> 31;
> - if (toggle != slt) {
> - if (master->slave_state[slave].comm_toggle
> - != slt) {
> - pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
> - slave, slt,
> - master->slave_state[slave].comm_toggle);
> - master->slave_state[slave].comm_toggle =
> - slt;
> - }
> - mlx4_master_do_cmd(dev, slave,
> - comm_cmd >> 16 & 0xff,
> - comm_cmd & 0xffff, toggle);
> - ++served;
> + u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
> + u32 nmbr_bits;
> + u32 prev_slave;
> + bool first = true;
Please maintain reversed Christmas tree when possible.
Split declaration from init if needed.
> +
> + for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++)
> + lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]);
> + nmbr_bits = dev->persist->num_vfs + 1;
> + if (++priv->next_slave >= nmbr_bits)
> + priv->next_slave = 0;
> + slave = priv->next_slave;
> + while (true) {
> + slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave);
> + if (!first && slave >= priv->next_slave) {
> + break;
> + } else if (slave == nmbr_bits) {
Unnecessary else, as if block breaks.
> + if (!first)
> + break;
> + first = false;
> + slave = 0;
> + continue;
> + }
> + ++reported;
> + comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write));
> + slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31;
> + toggle = comm_cmd >> 31;
> + if (toggle != slt) {
> + if (master->slave_state[slave].comm_toggle
> + != slt) {
> + pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
> + slave, slt,
> + master->slave_state[slave].comm_toggle);
> + master->slave_state[slave].comm_toggle =
> + slt;
> }
> + mlx4_master_do_cmd(dev, slave,
> + comm_cmd >> 16 & 0xff,
> + comm_cmd & 0xffff, toggle);
> + ++served;
> }
> + prev_slave = slave++;
> }
>
> if (reported && reported != served)
> - mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served\n",
> - reported, served);
> + mlx4_warn(dev, "Got command event with bitmask from %d slaves but %d were served %x %d\n",
Not clear from the string what these newly printed values are.
Improve the string/description.
> + reported, served, lbit_vec[0], priv->next_slave);
>
> if (mlx4_ARM_COMM_CHANNEL(dev))
> mlx4_warn(dev, "Failed to arm comm channel events\n");
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> index 64bed7ac3836..cd6ba80f4c90 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
> @@ -924,6 +924,7 @@ struct mlx4_priv {
>
> atomic_t opreq_count;
> struct work_struct opreq_task;
> + u32 next_slave; /* mlx4_master_comm_channel */
Move it to a more specific struct, consider struct mlx4_mfunc_master_ctx.
> };
>
> static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
>
Please submit to netdev mailing list, and CC needed maintainers.
^ permalink raw reply [flat|nested] 5+ messages in thread