* [PATCH] vdpa/mlx5: Fix firmware error on creation of 1k VQs
@ 2023-08-29 17:41 ` Dragos Tatulea via Virtualization
0 siblings, 0 replies; 4+ messages in thread
From: Dragos Tatulea @ 2023-08-29 17:41 UTC (permalink / raw)
To: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Parav Pandit
Cc: Dragos Tatulea, Saeed Mahameed, virtualization, linux-kernel
A firmware error is triggered when configuring a 9k MTU on the PF after
switching to switchdev mode and then using a vdpa device with larger
(1k) rings:
mlx5_cmd_out_err: CREATE_GENERAL_OBJECT(0xa00) op_mod(0xd) failed, status bad resource(0x5), syndrome (0xf6db90), err(-22)
This is due to the fact that the hw VQ size parameters are computed
based on the umem_1/2/3_buffer_param_a/b capabilities and all
device capabilities are read only when the driver is moved to switchdev mode.
The problematic configuration flow looks like this:
1) Create VF
2) Unbind VF
3) Switch PF to switchdev mode.
4) Bind VF
5) Set PF MTU to 9k
6) create vDPA device
7) Start VM with vDPA device and 1K queue size
Note that setting the MTU before step 3) doesn't trigger this issue.
This patch reads the forementioned umem parameters at the latest point
possible before the VQs of the device are created.
Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
---
drivers/vdpa/mlx5/net/mlx5_vnet.c | 55 ++++++++++++++++++++++++++-----
drivers/vdpa/mlx5/net/mlx5_vnet.h | 9 +++++
2 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 37be945a0230..85855680b24c 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -625,30 +625,62 @@ static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
}
+static int read_umem_params(struct mlx5_vdpa_net *ndev)
+{
+ u32 out[MLX5_ST_SZ_DW(query_hca_cap_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+ u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
+ struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
+ void *caps;
+ int err;
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
+ if (err) {
+ mlx5_vdpa_warn(&ndev->mvdev,
+ "Failed reading vdpa umem capabilities with err %d\n", err);
+ return err;
+ }
+
+ caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+
+ ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
+ ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
+
+ ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
+ ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
+
+ ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
+ ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
+
+ return 0;
+}
+
static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
struct mlx5_vdpa_umem **umemp)
{
- struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
- int p_a;
- int p_b;
+ u32 p_a;
+ u32 p_b;
switch (num) {
case 1:
- p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
- p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
+ p_a = ndev->umem_1_buffer_param_a;
+ p_b = ndev->umem_1_buffer_param_b;
*umemp = &mvq->umem1;
break;
case 2:
- p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
- p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
+ p_a = ndev->umem_2_buffer_param_a;
+ p_b = ndev->umem_2_buffer_param_b;
*umemp = &mvq->umem2;
break;
case 3:
- p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
- p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
+ p_a = ndev->umem_3_buffer_param_a;
+ p_b = ndev->umem_3_buffer_param_b;
*umemp = &mvq->umem3;
break;
}
+
(*umemp)->size = p_a * mvq->num_ent + p_b;
}
@@ -2679,6 +2711,11 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev)
goto out;
}
mlx5_vdpa_add_debugfs(ndev);
+
+ err = read_umem_params(ndev);
+ if (err)
+ goto err_setup;
+
err = setup_virtqueues(mvdev);
if (err) {
mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h
index 36c44d9fdd16..65ebbba20662 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.h
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h
@@ -65,6 +65,15 @@ struct mlx5_vdpa_net {
struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
struct mlx5_vdpa_irq_pool irqp;
struct dentry *debugfs;
+
+ u32 umem_1_buffer_param_a;
+ u32 umem_1_buffer_param_b;
+
+ u32 umem_2_buffer_param_a;
+ u32 umem_2_buffer_param_b;
+
+ u32 umem_3_buffer_param_a;
+ u32 umem_3_buffer_param_b;
};
struct mlx5_vdpa_counter {
--
2.41.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH] vdpa/mlx5: Fix firmware error on creation of 1k VQs
@ 2023-08-29 17:41 ` Dragos Tatulea via Virtualization
0 siblings, 0 replies; 4+ messages in thread
From: Dragos Tatulea via Virtualization @ 2023-08-29 17:41 UTC (permalink / raw)
To: Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Parav Pandit
Cc: linux-kernel, Saeed Mahameed, virtualization
A firmware error is triggered when configuring a 9k MTU on the PF after
switching to switchdev mode and then using a vdpa device with larger
(1k) rings:
mlx5_cmd_out_err: CREATE_GENERAL_OBJECT(0xa00) op_mod(0xd) failed, status bad resource(0x5), syndrome (0xf6db90), err(-22)
This is due to the fact that the hw VQ size parameters are computed
based on the umem_1/2/3_buffer_param_a/b capabilities and all
device capabilities are read only when the driver is moved to switchdev mode.
The problematic configuration flow looks like this:
1) Create VF
2) Unbind VF
3) Switch PF to switchdev mode.
4) Bind VF
5) Set PF MTU to 9k
6) create vDPA device
7) Start VM with vDPA device and 1K queue size
Note that setting the MTU before step 3) doesn't trigger this issue.
This patch reads the forementioned umem parameters at the latest point
possible before the VQs of the device are created.
Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
---
drivers/vdpa/mlx5/net/mlx5_vnet.c | 55 ++++++++++++++++++++++++++-----
drivers/vdpa/mlx5/net/mlx5_vnet.h | 9 +++++
2 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 37be945a0230..85855680b24c 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -625,30 +625,62 @@ static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
}
+static int read_umem_params(struct mlx5_vdpa_net *ndev)
+{
+ u32 out[MLX5_ST_SZ_DW(query_hca_cap_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+ u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
+ struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
+ void *caps;
+ int err;
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+ err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
+ if (err) {
+ mlx5_vdpa_warn(&ndev->mvdev,
+ "Failed reading vdpa umem capabilities with err %d\n", err);
+ return err;
+ }
+
+ caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+
+ ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
+ ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
+
+ ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
+ ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
+
+ ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
+ ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
+
+ return 0;
+}
+
static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
struct mlx5_vdpa_umem **umemp)
{
- struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
- int p_a;
- int p_b;
+ u32 p_a;
+ u32 p_b;
switch (num) {
case 1:
- p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
- p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
+ p_a = ndev->umem_1_buffer_param_a;
+ p_b = ndev->umem_1_buffer_param_b;
*umemp = &mvq->umem1;
break;
case 2:
- p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
- p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
+ p_a = ndev->umem_2_buffer_param_a;
+ p_b = ndev->umem_2_buffer_param_b;
*umemp = &mvq->umem2;
break;
case 3:
- p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
- p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
+ p_a = ndev->umem_3_buffer_param_a;
+ p_b = ndev->umem_3_buffer_param_b;
*umemp = &mvq->umem3;
break;
}
+
(*umemp)->size = p_a * mvq->num_ent + p_b;
}
@@ -2679,6 +2711,11 @@ static int setup_driver(struct mlx5_vdpa_dev *mvdev)
goto out;
}
mlx5_vdpa_add_debugfs(ndev);
+
+ err = read_umem_params(ndev);
+ if (err)
+ goto err_setup;
+
err = setup_virtqueues(mvdev);
if (err) {
mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h
index 36c44d9fdd16..65ebbba20662 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.h
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h
@@ -65,6 +65,15 @@ struct mlx5_vdpa_net {
struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
struct mlx5_vdpa_irq_pool irqp;
struct dentry *debugfs;
+
+ u32 umem_1_buffer_param_a;
+ u32 umem_1_buffer_param_b;
+
+ u32 umem_2_buffer_param_a;
+ u32 umem_2_buffer_param_b;
+
+ u32 umem_3_buffer_param_a;
+ u32 umem_3_buffer_param_b;
};
struct mlx5_vdpa_counter {
--
2.41.0
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] vdpa/mlx5: Fix firmware error on creation of 1k VQs
2023-08-29 17:41 ` Dragos Tatulea via Virtualization
@ 2023-08-29 19:02 ` kernel test robot
-1 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2023-08-29 19:02 UTC (permalink / raw)
To: Dragos Tatulea, Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Parav Pandit
Cc: linux-kernel, virtualization, Saeed Mahameed, oe-kbuild-all
Hi Dragos,
kernel test robot noticed the following build warnings:
[auto build test WARNING on linus/master]
[also build test WARNING on v6.5 next-20230829]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Dragos-Tatulea/vdpa-mlx5-Fix-firmware-error-on-creation-of-1k-VQs/20230830-014600
base: linus/master
patch link: https://lore.kernel.org/r/20230829174219.928343-1-dtatulea%40nvidia.com
patch subject: [PATCH] vdpa/mlx5: Fix firmware error on creation of 1k VQs
config: sparc-allyesconfig (https://download.01.org/0day-ci/archive/20230830/202308300241.q7t7Ouf3-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230830/202308300241.q7t7Ouf3-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308300241.q7t7Ouf3-lkp@intel.com/
All warnings (new ones prefixed by >>):
drivers/vdpa/mlx5/net/mlx5_vnet.c: In function 'read_umem_params':
>> drivers/vdpa/mlx5/net/mlx5_vnet.c:658:1: warning: the frame size of 4128 bytes is larger than 2048 bytes [-Wframe-larger-than=]
658 | }
| ^
vim +658 drivers/vdpa/mlx5/net/mlx5_vnet.c
627
628 static int read_umem_params(struct mlx5_vdpa_net *ndev)
629 {
630 u32 out[MLX5_ST_SZ_DW(query_hca_cap_out)] = {};
631 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
632 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
633 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
634 void *caps;
635 int err;
636
637 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
638 MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
639 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
640 if (err) {
641 mlx5_vdpa_warn(&ndev->mvdev,
642 "Failed reading vdpa umem capabilities with err %d\n", err);
643 return err;
644 }
645
646 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
647
648 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
649 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
650
651 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
652 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
653
654 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
655 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
656
657 return 0;
> 658 }
659
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] vdpa/mlx5: Fix firmware error on creation of 1k VQs
@ 2023-08-29 19:02 ` kernel test robot
0 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2023-08-29 19:02 UTC (permalink / raw)
To: Dragos Tatulea, Michael S. Tsirkin, Jason Wang, Xuan Zhuo, Parav Pandit
Cc: oe-kbuild-all, Dragos Tatulea, Saeed Mahameed, virtualization,
linux-kernel
Hi Dragos,
kernel test robot noticed the following build warnings:
[auto build test WARNING on linus/master]
[also build test WARNING on v6.5 next-20230829]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Dragos-Tatulea/vdpa-mlx5-Fix-firmware-error-on-creation-of-1k-VQs/20230830-014600
base: linus/master
patch link: https://lore.kernel.org/r/20230829174219.928343-1-dtatulea%40nvidia.com
patch subject: [PATCH] vdpa/mlx5: Fix firmware error on creation of 1k VQs
config: sparc-allyesconfig (https://download.01.org/0day-ci/archive/20230830/202308300241.q7t7Ouf3-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230830/202308300241.q7t7Ouf3-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308300241.q7t7Ouf3-lkp@intel.com/
All warnings (new ones prefixed by >>):
drivers/vdpa/mlx5/net/mlx5_vnet.c: In function 'read_umem_params':
>> drivers/vdpa/mlx5/net/mlx5_vnet.c:658:1: warning: the frame size of 4128 bytes is larger than 2048 bytes [-Wframe-larger-than=]
658 | }
| ^
vim +658 drivers/vdpa/mlx5/net/mlx5_vnet.c
627
628 static int read_umem_params(struct mlx5_vdpa_net *ndev)
629 {
630 u32 out[MLX5_ST_SZ_DW(query_hca_cap_out)] = {};
631 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
632 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
633 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
634 void *caps;
635 int err;
636
637 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
638 MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
639 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
640 if (err) {
641 mlx5_vdpa_warn(&ndev->mvdev,
642 "Failed reading vdpa umem capabilities with err %d\n", err);
643 return err;
644 }
645
646 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
647
648 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
649 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
650
651 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
652 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
653
654 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
655 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
656
657 return 0;
> 658 }
659
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-08-29 19:02 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-29 17:41 [PATCH] vdpa/mlx5: Fix firmware error on creation of 1k VQs Dragos Tatulea
2023-08-29 17:41 ` Dragos Tatulea via Virtualization
2023-08-29 19:02 ` kernel test robot
2023-08-29 19:02 ` kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.