linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Eran Ben Elisha <eranbe@mellanox.com>,
	Moshe Shemesh <moshe@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>,
	Sasha Levin <sashal@kernel.org>,
	netdev@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: [PATCH AUTOSEL 5.6 38/47] net/mlx5: Fix a race when moving command interface to events mode
Date: Thu, 28 May 2020 07:55:51 -0400	[thread overview]
Message-ID: <20200528115600.1405808-38-sashal@kernel.org> (raw)
In-Reply-To: <20200528115600.1405808-1-sashal@kernel.org>

From: Eran Ben Elisha <eranbe@mellanox.com>

[ Upstream commit d43b7007dbd1195a5b6b83213e49b1516aaf6f5e ]

After driver creates (via FW command) an EQ for commands, the driver will
be informed on new commands completion by EQE. However, due to a race in
driver's internal command mode metadata update, some new commands will
still be miss-handled by driver as if we are in polling mode. Such commands
can get two non forced completion, leading to already freed command entry
access.

CREATE_EQ command, that maps EQ to the command queue must be posted to the
command queue while it is empty and no other command should be posted.

Add SW mechanism that once the CREATE_EQ command is about to be executed,
all other commands will return error without being sent to the FW. Allow
sending other commands only after successfully changing the driver's
internal command mode metadata.
We can safely return error to all other commands while creating the command
EQ, as all other commands might be sent from the user/application during
driver load. Application can rerun them later after driver's load was
finished.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 35 ++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  |  3 ++
 include/linux/mlx5/driver.h                   |  6 ++++
 3 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d695b75bc0af..2f3cafdc3b1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -848,6 +848,14 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
 static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
 			      struct mlx5_cmd_msg *msg);
 
+static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
+{
+	if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL)
+		return true;
+
+	return cmd->allowed_opcode == opcode;
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
 	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
@@ -914,7 +922,8 @@ static void cmd_work_handler(struct work_struct *work)
 
 	/* Skip sending command to fw if internal error */
 	if (pci_channel_offline(dev->pdev) ||
-	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+	    !opcode_allowed(&dev->cmd, ent->op)) {
 		u8 status = 0;
 		u32 drv_synd;
 
@@ -1405,6 +1414,22 @@ static void create_debugfs_files(struct mlx5_core_dev *dev)
 	mlx5_cmdif_debugfs_init(dev);
 }
 
+void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode)
+{
+	struct mlx5_cmd *cmd = &dev->cmd;
+	int i;
+
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		down(&cmd->sem);
+	down(&cmd->pages_sem);
+
+	cmd->allowed_opcode = opcode;
+
+	up(&cmd->pages_sem);
+	for (i = 0; i < cmd->max_reg_cmds; i++)
+		up(&cmd->sem);
+}
+
 static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
@@ -1681,12 +1706,13 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 	int err;
 	u8 status = 0;
 	u32 drv_synd;
+	u16 opcode;
 	u8 token;
 
+	opcode = MLX5_GET(mbox_in, in, opcode);
 	if (pci_channel_offline(dev->pdev) ||
-	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-		u16 opcode = MLX5_GET(mbox_in, in, opcode);
-
+	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+	    !opcode_allowed(&dev->cmd, opcode)) {
 		err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
 		MLX5_SET(mbox_out, out, status, status);
 		MLX5_SET(mbox_out, out, syndrome, drv_synd);
@@ -1988,6 +2014,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 	mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
 
 	cmd->mode = CMD_MODE_POLLING;
+	cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL;
 
 	create_msg_cache(dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index cccea3a8eddd..ce6c621af043 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -611,11 +611,13 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 		.nent = MLX5_NUM_CMD_EQE,
 		.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
 	};
+	mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
 	err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
 	if (err)
 		goto err1;
 
 	mlx5_cmd_use_events(dev);
+	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 
 	param = (struct mlx5_eq_param) {
 		.irq_index = 0,
@@ -645,6 +647,7 @@ err2:
 	mlx5_cmd_use_polling(dev);
 	cleanup_async_eq(dev, &table->cmd_eq, "cmd");
 err1:
+	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 	return err;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b596353a3a12..6050264ebde1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -301,6 +301,7 @@ struct mlx5_cmd {
 	struct semaphore sem;
 	struct semaphore pages_sem;
 	int	mode;
+	u16     allowed_opcode;
 	struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
 	struct dma_pool *pool;
 	struct mlx5_cmd_debug dbg;
@@ -893,10 +894,15 @@ mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
 	return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
 }
 
+enum {
+	CMD_ALLOWED_OPCODE_ALL,
+};
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
+void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode);
 
 struct mlx5_async_ctx {
 	struct mlx5_core_dev *dev;
-- 
2.25.1


  parent reply	other threads:[~2020-05-28 11:57 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-28 11:55 [PATCH AUTOSEL 5.6 01/47] ARC: Fix ICCM & DCCM runtime size checks Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 02/47] ARC: [plat-eznps]: Restrict to CONFIG_ISA_ARCOMPACT Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 03/47] efi/libstub: Avoid returning uninitialized data from setup_graphics() Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 04/47] evm: Fix RCU list related warnings Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 05/47] scsi: pm: Balance pm_only counter of request queue during system resume Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 06/47] efi/earlycon: Fix early printk for wider fonts Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 07/47] x86/hyperv: Properly suspend/resume reenlightenment notifications Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 08/47] dmaengine: ti: k3-udma: Fix TR mode flags for slave_sg and memcpy Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 09/47] i2c: altera: Fix race between xfer_msg and isr thread Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 10/47] io_uring: initialize ctx->sqo_wait earlier Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 11/47] io_uring: don't prepare DRAIN reqs twice Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 12/47] io_uring: fix FORCE_ASYNC req preparation Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 13/47] net: phy: propagate an error back to the callers of phy_sfp_probe Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 14/47] net sched: fix reporting the first-time use timestamp Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 15/47] x86/mmiotrace: Use cpumask_available() for cpumask_var_t variables Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 16/47] net: bmac: Fix read of MAC address from ROM Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 17/47] r8152: support additional Microsoft Surface Ethernet Adapter variant Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 18/47] drm/edid: Add Oculus Rift S to non-desktop list Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 19/47] s390/mm: fix set_huge_pte_at() for empty ptes Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 20/47] io_uring: reset -EBUSY error when io sq thread is waken up Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 21/47] drm/amd/display: DP training to set properly SCRAMBLING_DISABLE Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 22/47] riscv: Fix print_vm_layout build error if NOMMU Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 23/47] wireguard: selftests: use newer iproute2 for gcc-10 Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 24/47] wireguard: queueing: preserve flow hash across packet scrubbing Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 25/47] null_blk: return error for invalid zone size Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 26/47] net: ethernet: ti: fix some return value check of cpsw_ale_create() Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 27/47] net: sgi: ioc3-eth: Fix return value check in ioc3eth_probe() Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 28/47] felix: Fix initialization of ioremap resources Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 29/47] net: mvpp2: fix RX hashing for non-10G ports Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 30/47] net/ethernet/freescale: rework quiesce/activate for ucc_geth Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 31/47] net: ethernet: stmmac: Enable interface clocks on probe for IPQ806x Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 32/47] mlxsw: spectrum: Fix use-after-free of split/unsplit/type_set in case reload fails Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 33/47] selftests: mlxsw: qos_mc_aware: Specify arping timeout as an integer Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 34/47] r8169: fix OCP access on RTL8117 Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 35/47] net: mscc: ocelot: fix address ageing time (again) Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 36/47] net: sun: fix missing release regions in cas_init_one() Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 37/47] net/mlx5: Add command entry handling completion Sasha Levin
2020-05-28 11:55 ` Sasha Levin [this message]
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 39/47] net/mlx5e: Fix inner tirs handling Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 40/47] net/mlx5: Fix memory leak in mlx5_events_init Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 41/47] net/mlx5: Fix cleaning unmanaged flow tables Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 42/47] net/mlx5e: Update netdev txq on completions during closure Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 43/47] net/mlx5: Fix error flow in case of function_setup failure Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 44/47] net: Fix return value about devm_platform_ioremap_resource() Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 45/47] net: ethernet: ti: cpsw: fix ASSERT_RTNL() warning during suspend Sasha Levin
2020-05-28 11:55 ` [PATCH AUTOSEL 5.6 46/47] net/mlx4_core: fix a memory leak bug Sasha Levin
2020-05-28 11:56 ` [PATCH AUTOSEL 5.6 47/47] net: smsc911x: Fix runtime PM imbalance on error Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200528115600.1405808-38-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=eranbe@mellanox.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=moshe@mellanox.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@mellanox.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).