All of lore.kernel.org
 help / color / mirror / Atom feed
From: keith.busch@intel.com (Keith Busch)
Subject: [PATCHv2 6/6] nvme-pci: Use host managed power state for suspend
Date: Wed, 15 May 2019 10:36:25 -0600	[thread overview]
Message-ID: <20190515163625.21776-6-keith.busch@intel.com> (raw)
In-Reply-To: <20190515163625.21776-1-keith.busch@intel.com>

The nvme pci driver prepares its devices for power loss during suspend
by shutting down the controllers. The power setting is deferred to
pci driver's power management before the platform removes power. The
suspend-to-idle mode, however, does not remove power.

NVMe devices that implement host managed power settings can achieve
lower power and better transition latencies than using generic PCI power
settings. Try to use this feature if the platform is not involved with
the suspend. If successful, restore the previous power state on resume.

Cc: Mario Limonciello <Mario.Limonciello at dell.com>
Cc: Kai Heng Feng <kai.heng.feng at canonical.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
v1 -> v2:

  Split prep patches for the get features

  Ensure queued and dispatch IO completes before attempting to set the low
  power state. This also required a sync to ensure that nothing timed
  out or reset the controller while we attempted the intermittent queue freeze.

  Disable HMB if enabled. It is not clear this should be necessary except
  through empirical reports.

 drivers/nvme/host/pci.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 599065ed6a32..42d5c6369803 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -18,6 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/once.h>
 #include <linux/pci.h>
+#include <linux/suspend.h>
 #include <linux/t10-pi.h>
 #include <linux/types.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
@@ -116,6 +117,7 @@ struct nvme_dev {
 	u32 cmbsz;
 	u32 cmbloc;
 	struct nvme_ctrl ctrl;
+	u32 last_ps;
 
 	mempool_t *iod_mempool;
 
@@ -2829,11 +2831,87 @@ static void nvme_remove(struct pci_dev *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int nvme_deep_state(struct nvme_dev *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct nvme_ctrl *ctrl = &dev->ctrl;
+	int ret = -EBUSY;;
+
+	nvme_start_freeze(ctrl);
+	nvme_wait_freeze(ctrl);
+	nvme_sync_queues(ctrl);
+
+	if (ctrl->state != NVME_CTRL_LIVE &&
+	    ctrl->state != NVME_CTRL_ADMIN_ONLY)
+		goto unfreeze;
+
+	if (dev->host_mem_descs) {
+		ret = nvme_set_host_mem(dev, 0);
+		if (ret < 0)
+			goto unfreeze;
+	}
+
+	dev->last_ps = 0;
+	ret = nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0,
+				&dev->last_ps);
+	if (ret < 0)
+		goto unfreeze;
+
+	ret = nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, dev->ctrl.npss,
+				NULL, 0, NULL);
+	if (ret < 0)
+		goto unfreeze;
+
+	if (ret) {
+		/*
+		 * Clearing npss forces a controller reset on resume. The
+		 * correct value will be resdicovered then.
+		 */
+		ctrl->npss = 0;
+		nvme_dev_disable(dev, true);
+		ret = 0;
+		goto unfreeze;
+	}
+
+	/*
+	 * A saved state prevents pci pm from generically controlling the
+	 * device's power. We're using protocol specific settings so we don't
+	 * want pci interfering.
+	 */
+	pci_save_state(pdev);
+unfreeze:
+	nvme_unfreeze(ctrl);
+	return ret;
+}
+
+static int nvme_make_operational(struct nvme_dev *dev)
+{
+	struct nvme_ctrl *ctrl = &dev->ctrl;
+	int ret;
+
+	ret = nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, dev->last_ps,
+				NULL, 0, NULL);
+	if (ret)
+		goto reset;
+
+	if (dev->host_mem_descs) {
+		ret = nvme_setup_host_mem(dev);
+		if (ret)
+			goto reset;
+	}
+	return 0;
+reset:
+	nvme_reset_ctrl(ctrl);
+	return 0;
+}
+
 static int nvme_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
+	if (!pm_suspend_via_firmware() && ndev->ctrl.npss)
+		return nvme_deep_state(ndev);
 	nvme_dev_disable(ndev, true);
 	return 0;
 }
@@ -2843,6 +2921,8 @@ static int nvme_resume(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
+	if (!pm_resume_via_firmware() && ndev->ctrl.npss)
+		return nvme_make_operational(ndev);
 	nvme_reset_ctrl(&ndev->ctrl);
 	return 0;
 }
-- 
2.14.4

  parent reply	other threads:[~2019-05-15 16:36 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-15 16:36 [PATCH 1/6] nvme-pci: Fix controller freeze wait disabling Keith Busch
2019-05-15 16:36 ` [PATCH 2/6] nvme-pci: Don't disable on timeout in reset state Keith Busch
2019-05-16  3:07   ` Ming Lei
2019-05-16 14:33     ` Keith Busch
2019-05-16  6:27   ` Christoph Hellwig
2019-05-15 16:36 ` [PATCH 3/6] nvme-pci: Unblock reset_work on IO failure Keith Busch
2019-05-16  3:13   ` Ming Lei
2019-05-16 14:14     ` Keith Busch
2019-05-17  2:31       ` Ming Lei
2019-05-16  6:28   ` Christoph Hellwig
2019-05-15 16:36 ` [PATCH 4/6] nvme-pci: Sync queues on reset Keith Busch
2019-05-16  3:34   ` Ming Lei
2019-05-16  6:29   ` Christoph Hellwig
2019-05-16 14:08     ` Keith Busch
2019-05-16 13:43   ` Minwoo Im
2019-05-15 16:36 ` [PATCH 5/6] nvme: Export get and set features Keith Busch
2019-05-16  6:26   ` Christoph Hellwig
2019-05-16 13:47   ` Minwoo Im
2019-05-15 16:36 ` Keith Busch [this message]
2019-05-15 19:33   ` [PATCHv2 6/6] nvme-pci: Use host managed power state for suspend Mario.Limonciello
2019-05-15 19:34     ` Keith Busch
2019-05-15 19:43       ` Mario.Limonciello
2019-05-16  6:25   ` Christoph Hellwig
2019-05-16 14:24     ` Keith Busch
2019-05-17  9:08       ` Christoph Hellwig
2019-05-16  9:29   ` Rafael J. Wysocki
2019-05-16 14:26     ` Keith Busch
2019-05-16 18:27       ` Kai-Heng Feng
2019-05-16 18:33         ` Mario.Limonciello
2019-05-16 19:38           ` Keith Busch
2019-05-16 20:25             ` Rafael J. Wysocki
2019-05-16 20:39               ` Keith Busch
2019-05-16 20:56                 ` Rafael J. Wysocki
2019-05-17  8:39                   ` Rafael J. Wysocki
2019-05-17  9:05                     ` Christoph Hellwig
2019-05-17  9:17                       ` Rafael J. Wysocki
2019-05-17  9:35                         ` Christoph Hellwig
2019-05-17 10:34                           ` Rafael J. Wysocki
2019-05-22  6:47                             ` Kai Heng Feng
2019-05-22 15:52                               ` Christoph Hellwig
2019-05-22 16:02                                 ` Keith Busch
2019-05-22 16:35                                   ` Mario.Limonciello
2019-05-22 16:56                                     ` Keith Busch
2019-05-22 23:08                                     ` Keith Busch
2019-05-23 15:27                                       ` Keith Busch
2019-05-17  9:22                     ` Kai-Heng Feng
2019-05-17  9:32                       ` Rafael J. Wysocki
2019-05-16 20:24         ` Rafael J. Wysocki
2019-05-16  2:43 ` [PATCH 1/6] nvme-pci: Fix controller freeze wait disabling Ming Lei
2019-05-17 18:40   ` Keith Busch
2019-05-16  6:27 ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190515163625.21776-6-keith.busch@intel.com \
    --to=keith.busch@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.