LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Moshe Shemesh <moshe@mellanox.com>
To: "David S. Miller" <davem@davemloft.net>,
	Jiri Pirko <jiri@mellanox.com>,
	Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Moshe Shemesh <moshe@mellanox.com>
Subject: [PATCH net-next RFC 06/13] net/mlx5: Handle sync reset now event
Date: Mon, 27 Jul 2020 14:02:26 +0300
Message-ID: <1595847753-2234-7-git-send-email-moshe@mellanox.com> (raw)
In-Reply-To: <1595847753-2234-1-git-send-email-moshe@mellanox.com>

On sync_reset_now event the driver does reload and PCI link toggle to
activate firmware upgrade reset. When the firmware sends this event it
syncs the event on all PFs, so all PFs will do PCI link toggle at once.
To do PCI link toggle, the driver ensures that no other device ID under
the same bridge by checking that all the PF functions under the same PCI
bridge have same device ID. If no other device it uses PCI bridge link
control to turn link down and up.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/fw_reset.c    | 126 ++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 13f83fc1783f..87f2b24f4aaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -8,6 +8,9 @@ struct mlx5_fw_reset {
 	struct mlx5_nb nb;
 	struct workqueue_struct *wq;
 	struct work_struct reset_request_work;
+	struct work_struct reset_now_work;
+	struct completion done;
+	int ret;
 };
 
 static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
@@ -71,6 +74,124 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
 		mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
 }
 
+#define MLX5_PCI_LINK_UP_TIMEOUT 2000
+
+static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+{
+	struct pci_bus *bridge_bus = dev->pdev->bus;
+	struct pci_dev *bridge = bridge_bus->self;
+	u16 reg16, dev_id, sdev_id;
+	unsigned long timeout;
+	struct pci_dev *sdev;
+	int cap, err;
+	u32 reg32;
+
+	/* Check that all functions under the pci bridge are PFs of
+	 * this device otherwise fail this function.
+	 */
+	err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+	if (err)
+		return err;
+	list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+		err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id);
+		if (err)
+			return err;
+		if (sdev_id != dev_id)
+			return -EPERM;
+	}
+
+	cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
+	if (!cap)
+		return -EOPNOTSUPP;
+
+	list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+		pci_save_state(sdev);
+		pci_cfg_access_lock(sdev);
+	}
+	/* PCI link toggle */
+	err = pci_read_config_word(bridge, cap + PCI_EXP_LNKCTL, &reg16);
+	if (err)
+		return err;
+	reg16 |= PCI_EXP_LNKCTL_LD;
+	err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
+	if (err)
+		return err;
+	msleep(500);
+	reg16 &= ~PCI_EXP_LNKCTL_LD;
+	err = pci_write_config_word(bridge, cap + PCI_EXP_LNKCTL, reg16);
+	if (err)
+		return err;
+
+	/* Check link */
+	err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, &reg32);
+	if (err)
+		return err;
+	if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) {
+		mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32);
+		msleep(1000);
+		goto restore;
+	}
+
+	timeout = jiffies + msecs_to_jiffies(MLX5_PCI_LINK_UP_TIMEOUT);
+	do {
+		err = pci_read_config_word(bridge, cap + PCI_EXP_LNKSTA, &reg16);
+		if (err)
+			return err;
+		if (reg16 & PCI_EXP_LNKSTA_DLLLA)
+			break;
+		msleep(20);
+	} while (!time_after(jiffies, timeout));
+
+	if (reg16 & PCI_EXP_LNKSTA_DLLLA) {
+		mlx5_core_info(dev, "PCI Link up\n");
+	} else {
+		mlx5_core_err(dev, "PCI link not ready (0x%04x) after %d ms\n",
+			      reg16, MLX5_PCI_LINK_UP_TIMEOUT);
+		err = -ETIMEDOUT;
+	}
+
+restore:
+	list_for_each_entry(sdev, &bridge_bus->devices, bus_list) {
+		pci_cfg_access_unlock(sdev);
+		pci_restore_state(sdev);
+	}
+
+	return err;
+}
+
+static void mlx5_sync_reset_now_event(struct work_struct *work)
+{
+	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
+						      reset_now_work);
+	struct mlx5_core_dev *dev = fw_reset->dev;
+	int err;
+
+	mlx5_health_clear_reset_requested_mode(dev);
+	mlx5_stop_health_poll(dev, true);
+
+	mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
+
+	err = mlx5_cmd_fast_teardown_hca(dev);
+	if (err) {
+		mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err);
+		goto done;
+	}
+
+	err = mlx5_pci_link_toggle(dev);
+	if (err) {
+		mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
+		goto done;
+	}
+
+	mlx5_enter_error_state(dev, true);
+	mlx5_unload_one(dev, false);
+done:
+	if (err)
+		mlx5_start_health_poll(dev);
+	fw_reset->ret = err;
+	complete(&fw_reset->done);
+}
+
 static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct mlx5_eqe *eqe)
 {
 	struct mlx5_eqe_sync_fw_update *sync_fw_update_eqe;
@@ -82,6 +203,9 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct
 	case MLX5_SYNC_RST_STATE_RESET_REQUEST:
 		queue_work(fw_reset->wq, &fw_reset->reset_request_work);
 		break;
+	case MLX5_SYNC_RST_STATE_RESET_NOW:
+		queue_work(fw_reset->wq, &fw_reset->reset_now_work);
+		break;
 	}
 }
 
@@ -117,10 +241,12 @@ int mlx5_fw_reset_events_init(struct mlx5_core_dev *dev)
 	dev->priv.fw_reset = fw_reset;
 
 	INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event);
+	INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
 
 	MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT);
 	mlx5_eq_notifier_register(dev, &fw_reset->nb);
 
+	init_completion(&fw_reset->done);
 	return 0;
 }
 
-- 
2.17.1


  parent reply index

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-27 11:02 [PATCH net-next RFC 00/13] Add devlink reload level option Moshe Shemesh
2020-07-27 11:02 ` [PATCH net-next RFC 01/13] devlink: Add reload level option to devlink reload command Moshe Shemesh
2020-07-28  0:58   ` Jakub Kicinski
2020-07-28 13:58     ` Jiri Pirko
2020-07-28 16:47       ` Jacob Keller
2020-07-28 18:44         ` Jakub Kicinski
2020-07-28 19:18           ` Jacob Keller
2020-07-28 20:06             ` Jakub Kicinski
2020-07-29 14:54               ` Moshe Shemesh
2020-07-29 21:07                 ` Jakub Kicinski
2020-07-30 12:30                   ` Moshe Shemesh
2020-07-30 23:11                     ` Jakub Kicinski
2020-08-01 21:32                       ` Moshe Shemesh
2020-08-03 14:14                         ` Jiri Pirko
2020-08-03 20:57                           ` Jakub Kicinski
2020-08-04 10:04                             ` Jiri Pirko
2020-08-04 20:39                               ` Jakub Kicinski
2020-08-05 11:02                                 ` Jiri Pirko
2020-08-06 18:25                                   ` Jakub Kicinski
2020-08-06 22:56                                     ` Jacob Keller
2020-08-09 13:21                                     ` Moshe Shemesh
2020-08-10 16:53                                       ` Jakub Kicinski
2020-08-10 17:09                                         ` Jacob Keller
2020-08-10 18:17                                           ` Jakub Kicinski
2020-08-11  5:46                                         ` Jiri Pirko
2020-07-27 11:02 ` [PATCH net-next RFC 02/13] devlink: Add reload levels data to dev get Moshe Shemesh
2020-07-28  0:58   ` Jakub Kicinski
2020-07-29 14:37     ` Moshe Shemesh
2020-07-29 21:11       ` Jakub Kicinski
2020-07-30 12:05         ` Moshe Shemesh
2020-07-27 11:02 ` [PATCH net-next RFC 03/13] net/mlx5: Add functions to set/query MFRL register Moshe Shemesh
2020-07-27 11:02 ` [PATCH net-next RFC 04/13] net/mlx5: Set cap for pci sync for fw update event Moshe Shemesh
2020-07-27 11:02 ` [PATCH net-next RFC 05/13] net/mlx5: Handle sync reset request event Moshe Shemesh
2020-07-27 11:02 ` Moshe Shemesh [this message]
2020-07-27 11:02 ` [PATCH net-next RFC 07/13] net/mlx5: Handle sync reset abort event Moshe Shemesh
2020-07-27 11:02 ` [PATCH net-next RFC 08/13] net/mlx5: Add support for devlink reload level fw reset Moshe Shemesh
2020-07-27 11:02 ` [PATCH net-next RFC 09/13] devlink: Add enable_remote_dev_reset generic parameter Moshe Shemesh
2020-07-28  0:59   ` Jakub Kicinski
2020-07-29 14:42     ` Moshe Shemesh
2020-07-29 20:57       ` Jakub Kicinski
2020-07-30 12:08         ` Moshe Shemesh
2020-07-27 11:02 ` [PATCH net-next RFC 10/13] net/mlx5: Add devlink param enable_remote_dev_reset support Moshe Shemesh
2020-07-28  0:59   ` Jakub Kicinski
2020-07-27 11:02 ` [PATCH net-next RFC 11/13] net/mlx5: Add support for fw live patch event Moshe Shemesh
2020-07-27 11:02 ` [PATCH net-next RFC 12/13] net/mlx5: Add support for devlink reload level live patch Moshe Shemesh
2020-07-27 11:02 ` [PATCH net-next RFC 13/13] devlink: Add Documentation/networking/devlink/devlink-reload.rst Moshe Shemesh
2020-07-28  5:25 ` [PATCH net-next RFC 00/13] Add devlink reload level option Vasundhara Volam
2020-07-28 16:43   ` Jacob Keller
2020-08-03 10:24     ` Vasundhara Volam
2020-08-03 12:17       ` Moshe Shemesh
2020-08-03 12:47         ` Vasundhara Volam
2020-08-03 13:52           ` Moshe Shemesh
2020-08-04 10:13             ` Vasundhara Volam
2020-08-05  6:32               ` Moshe Shemesh
2020-08-05  6:55                 ` Vasundhara Volam
2020-08-05  8:20                   ` Moshe Shemesh
2020-08-12  9:34                     ` Vasundhara Volam
2020-07-28 16:37 ` Jacob Keller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1595847753-2234-7-git-send-email-moshe@mellanox.com \
    --to=moshe@mellanox.com \
    --cc=davem@davemloft.net \
    --cc=jiri@mellanox.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=vasundhara-v.volam@broadcom.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git