All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Eran Ben Elisha <eranbe@mellanox.com>,
	Moshe Shemesh <moshe@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	Timo Rothenpieler <timo@rothenpieler.org>
Subject: [PATCH 5.4 09/17] net/mlx5: Fix a race when moving command interface to events mode
Date: Fri, 20 Nov 2020 12:03:36 +0100	[thread overview]
Message-ID: <20201120104541.518263886@linuxfoundation.org> (raw)
In-Reply-To: <20201120104541.058449969@linuxfoundation.org>

From: Eran Ben Elisha <eranbe@mellanox.com>

commit d43b7007dbd1195a5b6b83213e49b1516aaf6f5e upstream.

After driver creates (via FW command) an EQ for commands, the driver will
be informed on new commands completion by EQE. However, due to a race in
driver's internal command mode metadata update, some new commands will
still be miss-handled by driver as if we are in polling mode. Such commands
can get two non forced completion, leading to already freed command entry
access.

CREATE_EQ command, that maps EQ to the command queue must be posted to the
command queue while it is empty and no other command should be posted.

Add SW mechanism that once the CREATE_EQ command is about to be executed,
all other commands will return error without being sent to the FW. Allow
sending other commands only after successfully changing the driver's
internal command mode metadata.
We can safely return error to all other commands while creating the command
EQ, as all other commands might be sent from the user/application during
driver load. Application can rerun them later after driver's load was
finished.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Cc: Timo Rothenpieler <timo@rothenpieler.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |   35 +++++++++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  |    3 ++
 include/linux/mlx5/driver.h                   |    6 ++++
 3 files changed, 40 insertions(+), 4 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -875,6 +875,14 @@ static void free_msg(struct mlx5_core_de
 static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
 			      struct mlx5_cmd_msg *msg);
 
+static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
+{
+	if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL)
+		return true;
+
+	return cmd->allowed_opcode == opcode;
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
 	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
@@ -941,7 +949,8 @@ static void cmd_work_handler(struct work
 
 	/* Skip sending command to fw if internal error */
 	if (pci_channel_offline(dev->pdev) ||
-	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+	    !opcode_allowed(&dev->cmd, ent->op)) {
 		u8 status = 0;
 		u32 drv_synd;
 
@@ -1459,6 +1468,22 @@ static void create_debugfs_files(struct
 	mlx5_cmdif_debugfs_init(dev);
 }
 
+void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+	int i;
+
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		down(&cmd->sem);
+	down(&cmd->pages_sem);
+
+	cmd->allowed_opcode = opcode;
+
+	up(&cmd->pages_sem);
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		up(&cmd->sem);
+}
+
 static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
@@ -1751,12 +1776,13 @@ static int cmd_exec(struct mlx5_core_dev
 	int err;
 	u8 status = 0;
 	u32 drv_synd;
+	u16 opcode;
 	u8 token;
 
+	opcode = MLX5_GET(mbox_in, in, opcode);
 	if (pci_channel_offline(dev->pdev) ||
-	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-		u16 opcode = MLX5_GET(mbox_in, in, opcode);
-
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+	    !opcode_allowed(&dev->cmd, opcode)) {
 		err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
 		MLX5_SET(mbox_out, out, status, status);
 		MLX5_SET(mbox_out, out, syndrome, drv_synd);
@@ -2058,6 +2084,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *
 	mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
 
 	cmd->mode = CMD_MODE_POLLING;
+	cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL;
 
 	create_msg_cache(dev);
 
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -648,11 +648,13 @@ static int create_async_eqs(struct mlx5_
 		.nent = MLX5_NUM_CMD_EQE,
 		.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
 	};
+	mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
 	err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
 	if (err)
 		goto err1;
 
 	mlx5_cmd_use_events(dev);
+	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 
 	param = (struct mlx5_eq_param) {
 		.irq_index = 0,
@@ -682,6 +684,7 @@ err2:
 	mlx5_cmd_use_polling(dev);
 	cleanup_async_eq(dev, &table->cmd_eq, "cmd");
 err1:
+	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 	return err;
 }
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -299,6 +299,7 @@ struct mlx5_cmd {
 	struct semaphore sem;
 	struct semaphore pages_sem;
 	int	mode;
+	u16     allowed_opcode;
 	struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
 	struct dma_pool *pool;
 	struct mlx5_cmd_debug dbg;
@@ -890,10 +891,15 @@ mlx5_frag_buf_get_idx_last_contig_stride
 	return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
 }
 
+enum {
+	CMD_ALLOWED_OPCODE_ALL,
+};
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
+void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode);
 
 struct mlx5_async_ctx {
 	struct mlx5_core_dev *dev;



  parent reply	other threads:[~2020-11-20 11:09 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-20 11:03 [PATCH 5.4 00/17] 5.4.79-rc1 review Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 01/17] selftests/powerpc: rfi_flush: disable entry flush if present Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 02/17] powerpc/64s: flush L1D on kernel entry Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 03/17] powerpc/64s: flush L1D after user accesses Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 04/17] powerpc: Only include kup-radix.h for 64-bit Book3S Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 05/17] selftests/powerpc: entry flush test Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 06/17] MIPS: PCI: Fix MIPS build Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 07/17] net/mlx5: Use async EQ setup cleanup helpers for multiple EQs Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 08/17] net/mlx5: poll cmd EQ in case of command timeout Greg Kroah-Hartman
2020-11-20 11:03 ` Greg Kroah-Hartman [this message]
2020-11-20 11:03 ` [PATCH 5.4 10/17] net/mlx5: Add retry mechanism to the command entry index allocation Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 11/17] powerpc/8xx: Always fault when _PAGE_ACCESSED is not set Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 12/17] net: lantiq: Add locking for TX DMA channel Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 13/17] Input: sunkbd - avoid use-after-free in teardown paths Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 14/17] mac80211: always wind down STA state Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 15/17] can: proc: can_remove_proc(): silence remove_proc_entry warning Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 16/17] KVM: x86: clflushopt should be treated as a no-op by emulation Greg Kroah-Hartman
2020-11-20 11:03 ` [PATCH 5.4 17/17] ACPI: GED: fix -Wformat Greg Kroah-Hartman
2020-11-20 16:56 ` [PATCH 5.4 00/17] 5.4.79-rc1 review Jon Hunter
2020-11-20 22:28 ` Shuah Khan
2020-11-21 11:48 ` Naresh Kamboju
2020-11-21 18:37 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201120104541.518263886@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=eranbe@mellanox.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=moshe@mellanox.com \
    --cc=saeedm@mellanox.com \
    --cc=stable@vger.kernel.org \
    --cc=timo@rothenpieler.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.