All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv3 1/2] nvme: Export get and set features
@ 2019-05-23 15:27 Keith Busch
  2019-05-23 15:27 ` [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend Keith Busch
  2019-05-23 15:36 ` [PATCHv3 1/2] nvme: Export get and set features Christoph Hellwig
  0 siblings, 2 replies; 8+ messages in thread
From: Keith Busch @ 2019-05-23 15:27 UTC (permalink / raw)


Future use intends to make use of both, so export these functions. And
since their implementation is identical except for the opcode, provide a
new function that implement both.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
Change since v2: Use 'unsigned int' instead of just 'unsiged'

 drivers/nvme/host/core.c | 22 +++++++++++++++++++---
 drivers/nvme/host/nvme.h |  4 ++++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f6879e417386..bf489800bf89 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1113,15 +1113,15 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
 	return id;
 }
 
-static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
-		      void *buffer, size_t buflen, u32 *result)
+static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
+		unsigned int dword11, void *buffer, size_t buflen, u32 *result)
 {
 	struct nvme_command c;
 	union nvme_result res;
 	int ret;
 
 	memset(&c, 0, sizeof(c));
-	c.features.opcode = nvme_admin_set_features;
+	c.features.opcode = op;
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
 
@@ -1132,6 +1132,22 @@ static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword
 	return ret;
 }
 
+int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11,
+		      void *buffer, size_t buflen, u32 *result)
+{
+	return nvme_features(dev, nvme_admin_set_features, fid, dword11, buffer,
+			     buflen, result);
+}
+EXPORT_SYMBOL_GPL(nvme_set_features);
+
+int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11,
+		      void *buffer, size_t buflen, u32 *result)
+{
+	return nvme_features(dev, nvme_admin_get_features, fid, dword11, buffer,
+			     buflen, result);
+}
+EXPORT_SYMBOL_GPL(nvme_get_features);
+
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 {
 	u32 q_count = (*count - 1) | ((*count - 1) << 16);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 55553d293a98..c7f6ce548b66 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -459,6 +459,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		union nvme_result *result, void *buffer, unsigned bufflen,
 		unsigned timeout, int qid, int at_head,
 		blk_mq_req_flags_t flags, bool poll);
+int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11,
+		      void *buffer, size_t buflen, u32 *result);
+int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11,
+		      void *buffer, size_t buflen, u32 *result);
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend
  2019-05-23 15:27 [PATCHv3 1/2] nvme: Export get and set features Keith Busch
@ 2019-05-23 15:27 ` Keith Busch
  2019-05-24  5:42   ` Kai-Heng Feng
                     ` (2 more replies)
  2019-05-23 15:36 ` [PATCHv3 1/2] nvme: Export get and set features Christoph Hellwig
  1 sibling, 3 replies; 8+ messages in thread
From: Keith Busch @ 2019-05-23 15:27 UTC (permalink / raw)


The nvme pci driver prepares its devices for power loss during suspend
by shutting down the controllers. The power setting is deferred to
pci driver's power management before the platform removes power. The
suspend-to-idle mode, however, does not remove power.

NVMe devices that implement host managed power settings can achieve
lower power and better transition latencies than using generic PCI power
settings. Try to use this feature if the platform is not involved with
the suspend. If successful, restore the previous power state on resume.

Cc: Mario Limonciello <Mario.Limonciello at dell.com>
Cc: Kai Heng Feng <kai.heng.feng at canonical.com>
Signed-off-by: Keith Busch <keith.busch at intel.com>
---
v2 -> v3:

  Removed the HMB handling. We've determined this is not be
  necessary in a s2i state.

  Splitting PM ops (Rafael)

  Incorporated improvements from Christoph

 drivers/nvme/host/pci.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 93 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 599065ed6a32..47da55abb56b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -18,6 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/once.h>
 #include <linux/pci.h>
+#include <linux/suspend.h>
 #include <linux/t10-pi.h>
 #include <linux/types.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
@@ -116,6 +117,7 @@ struct nvme_dev {
 	u32 cmbsz;
 	u32 cmbloc;
 	struct nvme_ctrl ctrl;
+	u32 last_ps;
 
 	mempool_t *iod_mempool;
 
@@ -2829,16 +2831,94 @@ static void nvme_remove(struct pci_dev *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps)
+{
+	return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps);
+}
+
+static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps)
+{
+	return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL);
+}
+
+static int nvme_resume(struct device *dev)
+{
+	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
+	struct nvme_ctrl *ctrl = &ndev->ctrl;
+
+	if (pm_resume_via_firmware() || !ctrl->npss ||
+	    nvme_set_power_state(ctrl, ndev->last_ps) != 0)
+		nvme_reset_ctrl(ctrl);
+	return 0;
+}
+
 static int nvme_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
+	struct nvme_ctrl *ctrl = &ndev->ctrl;
+	int ret = -EBUSY;
+
+	/*
+	 * The platform does not remove power for a kernel managed suspend so
+	 * use host managed nvme power settings for lowest idle power if
+	 * possible. This should have quicker resume latency than a full device
+	 * shutdown.  But if the firmware is involved after the suspend or the
+	 * device does not support any non-default power states, shut down the
+	 * device fully.
+	 */
+	if (pm_suspend_via_firmware() || !ctrl->npss) {
+		nvme_dev_disable(ndev, true);
+		return 0;
+	}
+
+	nvme_start_freeze(ctrl);
+	nvme_wait_freeze(ctrl);
+	nvme_sync_queues(ctrl);
+
+	if (ctrl->state != NVME_CTRL_LIVE &&
+	    ctrl->state != NVME_CTRL_ADMIN_ONLY)
+		goto unfreeze;
+
+	ndev->last_ps = 0;
+	ret = nvme_get_power_state(ctrl, &ndev->last_ps);
+	if (ret < 0)
+		goto unfreeze;
+
+	ret = nvme_set_power_state(ctrl, ctrl->npss);
+	if (ret < 0)
+		goto unfreeze;
+
+	if (ret) {
+		/*
+		 * Clearing npss forces a controller reset on resume. The
+		 * correct value will be resdicovered then.
+		 */
+		nvme_dev_disable(ndev, true);
+		ctrl->npss = 0;
+		ret = 0;
+		goto unfreeze;
+	}
+	/*
+	 * A saved state prevents pci pm from generically controlling the
+	 * device's power. If we're using protocol specific settings, we don't
+	 * want pci interfering.
+	 */
+	pci_save_state(pdev);
+unfreeze:
+	nvme_unfreeze(ctrl);
+	return ret;
+}
+
+static int nvme_simple_suspend(struct device *dev)
+{
+	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
 
 	nvme_dev_disable(ndev, true);
 	return 0;
 }
 
-static int nvme_resume(struct device *dev)
+static int nvme_simple_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
@@ -2846,9 +2926,19 @@ static int nvme_resume(struct device *dev)
 	nvme_reset_ctrl(&ndev->ctrl);
 	return 0;
 }
-#endif
 
-static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
+const struct dev_pm_ops nvme_dev_pm_ops = {
+	.suspend	= nvme_suspend,
+	.resume		= nvme_resume,
+	.freeze		= nvme_simple_suspend,
+	.thaw		= nvme_simple_resume,
+	.poweroff	= nvme_simple_suspend,
+	.restore	= nvme_simple_resume,
+};
+
+#else
+#define nvme_dev_pm_ops		NULL
+#endif
 
 static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
 						pci_channel_state_t state)
-- 
2.14.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCHv3 1/2] nvme: Export get and set features
  2019-05-23 15:27 [PATCHv3 1/2] nvme: Export get and set features Keith Busch
  2019-05-23 15:27 ` [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend Keith Busch
@ 2019-05-23 15:36 ` Christoph Hellwig
  2019-05-23 15:42   ` Keith Busch
  1 sibling, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2019-05-23 15:36 UTC (permalink / raw)


> Change since v2: Use 'unsigned int' instead of just 'unsiged'

Those are equivalent in C, why bother changing it?

> +int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11,

Especially as this now creates lines > 80 characters in various places.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCHv3 1/2] nvme: Export get and set features
  2019-05-23 15:36 ` [PATCHv3 1/2] nvme: Export get and set features Christoph Hellwig
@ 2019-05-23 15:42   ` Keith Busch
  0 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2019-05-23 15:42 UTC (permalink / raw)


On Thu, May 23, 2019@05:36:26PM +0200, Christoph Hellwig wrote:
> > Change since v2: Use 'unsigned int' instead of just 'unsiged'
> 
> Those are equivalent in C, why bother changing it?

Just to shut up checkpatch.pl:

  WARNING: Prefer 'unsigned int' to bare use of 'unsigned'
 
> > +int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11,
> 
> Especially as this now creates lines > 80 characters in various places.

*facepalm*

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend
  2019-05-23 15:27 ` [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend Keith Busch
@ 2019-05-24  5:42   ` Kai-Heng Feng
  2019-05-24 15:14     ` Mario.Limonciello
  2019-05-30  9:09   ` Rafael J. Wysocki
  2019-06-01  9:09   ` Christoph Hellwig
  2 siblings, 1 reply; 8+ messages in thread
From: Kai-Heng Feng @ 2019-05-24  5:42 UTC (permalink / raw)


at 23:27, Keith Busch <keith.busch@intel.com> wrote:

> The nvme pci driver prepares its devices for power loss during suspend
> by shutting down the controllers. The power setting is deferred to
> pci driver's power management before the platform removes power. The
> suspend-to-idle mode, however, does not remove power.
>
> NVMe devices that implement host managed power settings can achieve
> lower power and better transition latencies than using generic PCI power
> settings. Try to use this feature if the platform is not involved with
> the suspend. If successful, restore the previous power state on resume.
>
> Cc: Mario Limonciello <Mario.Limonciello at dell.com>
> Cc: Kai Heng Feng <kai.heng.feng at canonical.com>
> Signed-off-by: Keith Busch <keith.busch at intel.com>

Tested-by: Kai-Heng Feng <kai.heng.feng at canonical.com>

> ---
> v2 -> v3:
>
>   Removed the HMB handling. We've determined this is not be
>   necessary in a s2i state.
>
>   Splitting PM ops (Rafael)
>
>   Incorporated improvements from Christoph
>
>  drivers/nvme/host/pci.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 93 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 599065ed6a32..47da55abb56b 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -18,6 +18,7 @@
>  #include <linux/mutex.h>
>  #include <linux/once.h>
>  #include <linux/pci.h>
> +#include <linux/suspend.h>
>  #include <linux/t10-pi.h>
>  #include <linux/types.h>
>  #include <linux/io-64-nonatomic-lo-hi.h>
> @@ -116,6 +117,7 @@ struct nvme_dev {
>  	u32 cmbsz;
>  	u32 cmbloc;
>  	struct nvme_ctrl ctrl;
> +	u32 last_ps;
>
>  	mempool_t *iod_mempool;
>
> @@ -2829,16 +2831,94 @@ static void nvme_remove(struct pci_dev *pdev)
>  }
>
>  #ifdef CONFIG_PM_SLEEP
> +static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps)
> +{
> +	return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps);
> +}
> +
> +static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps)
> +{
> +	return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL);
> +}
> +
> +static int nvme_resume(struct device *dev)
> +{
> +	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
> +	struct nvme_ctrl *ctrl = &ndev->ctrl;
> +
> +	if (pm_resume_via_firmware() || !ctrl->npss ||
> +	    nvme_set_power_state(ctrl, ndev->last_ps) != 0)
> +		nvme_reset_ctrl(ctrl);
> +	return 0;
> +}
> +
>  static int nvme_suspend(struct device *dev)
>  {
>  	struct pci_dev *pdev = to_pci_dev(dev);
>  	struct nvme_dev *ndev = pci_get_drvdata(pdev);
> +	struct nvme_ctrl *ctrl = &ndev->ctrl;
> +	int ret = -EBUSY;
> +
> +	/*
> +	 * The platform does not remove power for a kernel managed suspend so
> +	 * use host managed nvme power settings for lowest idle power if
> +	 * possible. This should have quicker resume latency than a full device
> +	 * shutdown.  But if the firmware is involved after the suspend or the
> +	 * device does not support any non-default power states, shut down the
> +	 * device fully.
> +	 */
> +	if (pm_suspend_via_firmware() || !ctrl->npss) {
> +		nvme_dev_disable(ndev, true);
> +		return 0;
> +	}
> +
> +	nvme_start_freeze(ctrl);
> +	nvme_wait_freeze(ctrl);
> +	nvme_sync_queues(ctrl);
> +
> +	if (ctrl->state != NVME_CTRL_LIVE &&
> +	    ctrl->state != NVME_CTRL_ADMIN_ONLY)
> +		goto unfreeze;
> +
> +	ndev->last_ps = 0;
> +	ret = nvme_get_power_state(ctrl, &ndev->last_ps);
> +	if (ret < 0)
> +		goto unfreeze;
> +
> +	ret = nvme_set_power_state(ctrl, ctrl->npss);
> +	if (ret < 0)
> +		goto unfreeze;
> +
> +	if (ret) {
> +		/*
> +		 * Clearing npss forces a controller reset on resume. The
> +		 * correct value will be resdicovered then.
> +		 */
> +		nvme_dev_disable(ndev, true);
> +		ctrl->npss = 0;
> +		ret = 0;
> +		goto unfreeze;
> +	}
> +	/*
> +	 * A saved state prevents pci pm from generically controlling the
> +	 * device's power. If we're using protocol specific settings, we don't
> +	 * want pci interfering.
> +	 */
> +	pci_save_state(pdev);
> +unfreeze:
> +	nvme_unfreeze(ctrl);
> +	return ret;
> +}
> +
> +static int nvme_simple_suspend(struct device *dev)
> +{
> +	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
>
>  	nvme_dev_disable(ndev, true);
>  	return 0;
>  }
>
> -static int nvme_resume(struct device *dev)
> +static int nvme_simple_resume(struct device *dev)
>  {
>  	struct pci_dev *pdev = to_pci_dev(dev);
>  	struct nvme_dev *ndev = pci_get_drvdata(pdev);
> @@ -2846,9 +2926,19 @@ static int nvme_resume(struct device *dev)
>  	nvme_reset_ctrl(&ndev->ctrl);
>  	return 0;
>  }
> -#endif
>
> -static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
> +const struct dev_pm_ops nvme_dev_pm_ops = {
> +	.suspend	= nvme_suspend,
> +	.resume		= nvme_resume,
> +	.freeze		= nvme_simple_suspend,
> +	.thaw		= nvme_simple_resume,
> +	.poweroff	= nvme_simple_suspend,
> +	.restore	= nvme_simple_resume,
> +};
> +
> +#else
> +#define nvme_dev_pm_ops		NULL
> +#endif
>
>  static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
>  						pci_channel_state_t state)
> -- 
> 2.14.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend
  2019-05-24  5:42   ` Kai-Heng Feng
@ 2019-05-24 15:14     ` Mario.Limonciello
  0 siblings, 0 replies; 8+ messages in thread
From: Mario.Limonciello @ 2019-05-24 15:14 UTC (permalink / raw)


> -----Original Message-----
> From: Kai-Heng Feng <kai.heng.feng at canonical.com>
> Sent: Friday, May 24, 2019 12:43 AM
> To: Keith Busch
> Cc: Christoph Hellwig; Sagi Grimberg; linux-nvme at lists.infradead.org; Rafael
> Wysocki; Limonciello, Mario
> Subject: Re: [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend
> 
> 
> [EXTERNAL EMAIL]
> 
>@23:27, Keith Busch <keith.busch@intel.com> wrote:
> 
> > The nvme pci driver prepares its devices for power loss during suspend
> > by shutting down the controllers. The power setting is deferred to
> > pci driver's power management before the platform removes power. The
> > suspend-to-idle mode, however, does not remove power.
> >
> > NVMe devices that implement host managed power settings can achieve
> > lower power and better transition latencies than using generic PCI power
> > settings. Try to use this feature if the platform is not involved with
> > the suspend. If successful, restore the previous power state on resume.
> >
> > Cc: Mario Limonciello <Mario.Limonciello at dell.com>
> > Cc: Kai Heng Feng <kai.heng.feng at canonical.com>
> > Signed-off-by: Keith Busch <keith.busch at intel.com>
> 
> Tested-by: Kai-Heng Feng <kai.heng.feng at canonical.com>

Thanks, I tested with a Hynix SSD in an XPS 9380 this morning, and see good results.

Tested-by: Mario Limonciello <mario.limonciello at dell.com>

> 
> > ---
> > v2 -> v3:
> >
> >   Removed the HMB handling. We've determined this is not be
> >   necessary in a s2i state.
> >
> >   Splitting PM ops (Rafael)
> >
> >   Incorporated improvements from Christoph
> >
> >  drivers/nvme/host/pci.c | 96
> +++++++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 93 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> > index 599065ed6a32..47da55abb56b 100644
> > --- a/drivers/nvme/host/pci.c
> > +++ b/drivers/nvme/host/pci.c
> > @@ -18,6 +18,7 @@
> >  #include <linux/mutex.h>
> >  #include <linux/once.h>
> >  #include <linux/pci.h>
> > +#include <linux/suspend.h>
> >  #include <linux/t10-pi.h>
> >  #include <linux/types.h>
> >  #include <linux/io-64-nonatomic-lo-hi.h>
> > @@ -116,6 +117,7 @@ struct nvme_dev {
> >  	u32 cmbsz;
> >  	u32 cmbloc;
> >  	struct nvme_ctrl ctrl;
> > +	u32 last_ps;
> >
> >  	mempool_t *iod_mempool;
> >
> > @@ -2829,16 +2831,94 @@ static void nvme_remove(struct pci_dev *pdev)
> >  }
> >
> >  #ifdef CONFIG_PM_SLEEP
> > +static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps)
> > +{
> > +	return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0,
> ps);
> > +}
> > +
> > +static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps)
> > +{
> > +	return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0,
> NULL);
> > +}
> > +
> > +static int nvme_resume(struct device *dev)
> > +{
> > +	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
> > +	struct nvme_ctrl *ctrl = &ndev->ctrl;
> > +
> > +	if (pm_resume_via_firmware() || !ctrl->npss ||
> > +	    nvme_set_power_state(ctrl, ndev->last_ps) != 0)
> > +		nvme_reset_ctrl(ctrl);
> > +	return 0;
> > +}
> > +
> >  static int nvme_suspend(struct device *dev)
> >  {
> >  	struct pci_dev *pdev = to_pci_dev(dev);
> >  	struct nvme_dev *ndev = pci_get_drvdata(pdev);
> > +	struct nvme_ctrl *ctrl = &ndev->ctrl;
> > +	int ret = -EBUSY;
> > +
> > +	/*
> > +	 * The platform does not remove power for a kernel managed suspend so
> > +	 * use host managed nvme power settings for lowest idle power if
> > +	 * possible. This should have quicker resume latency than a full device
> > +	 * shutdown.  But if the firmware is involved after the suspend or the
> > +	 * device does not support any non-default power states, shut down the
> > +	 * device fully.
> > +	 */
> > +	if (pm_suspend_via_firmware() || !ctrl->npss) {
> > +		nvme_dev_disable(ndev, true);
> > +		return 0;
> > +	}
> > +
> > +	nvme_start_freeze(ctrl);
> > +	nvme_wait_freeze(ctrl);
> > +	nvme_sync_queues(ctrl);
> > +
> > +	if (ctrl->state != NVME_CTRL_LIVE &&
> > +	    ctrl->state != NVME_CTRL_ADMIN_ONLY)
> > +		goto unfreeze;
> > +
> > +	ndev->last_ps = 0;
> > +	ret = nvme_get_power_state(ctrl, &ndev->last_ps);
> > +	if (ret < 0)
> > +		goto unfreeze;
> > +
> > +	ret = nvme_set_power_state(ctrl, ctrl->npss);
> > +	if (ret < 0)
> > +		goto unfreeze;
> > +
> > +	if (ret) {
> > +		/*
> > +		 * Clearing npss forces a controller reset on resume. The
> > +		 * correct value will be resdicovered then.
> > +		 */
> > +		nvme_dev_disable(ndev, true);
> > +		ctrl->npss = 0;
> > +		ret = 0;
> > +		goto unfreeze;
> > +	}
> > +	/*
> > +	 * A saved state prevents pci pm from generically controlling the
> > +	 * device's power. If we're using protocol specific settings, we don't
> > +	 * want pci interfering.
> > +	 */
> > +	pci_save_state(pdev);
> > +unfreeze:
> > +	nvme_unfreeze(ctrl);
> > +	return ret;
> > +}
> > +
> > +static int nvme_simple_suspend(struct device *dev)
> > +{
> > +	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
> >
> >  	nvme_dev_disable(ndev, true);
> >  	return 0;
> >  }
> >
> > -static int nvme_resume(struct device *dev)
> > +static int nvme_simple_resume(struct device *dev)
> >  {
> >  	struct pci_dev *pdev = to_pci_dev(dev);
> >  	struct nvme_dev *ndev = pci_get_drvdata(pdev);
> > @@ -2846,9 +2926,19 @@ static int nvme_resume(struct device *dev)
> >  	nvme_reset_ctrl(&ndev->ctrl);
> >  	return 0;
> >  }
> > -#endif
> >
> > -static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend,
> nvme_resume);
> > +const struct dev_pm_ops nvme_dev_pm_ops = {
> > +	.suspend	= nvme_suspend,
> > +	.resume		= nvme_resume,
> > +	.freeze		= nvme_simple_suspend,
> > +	.thaw		= nvme_simple_resume,
> > +	.poweroff	= nvme_simple_suspend,
> > +	.restore	= nvme_simple_resume,
> > +};
> > +
> > +#else
> > +#define nvme_dev_pm_ops		NULL
> > +#endif
> >
> >  static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
> >  						pci_channel_state_t state)
> > --
> > 2.14.4
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend
  2019-05-23 15:27 ` [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend Keith Busch
  2019-05-24  5:42   ` Kai-Heng Feng
@ 2019-05-30  9:09   ` Rafael J. Wysocki
  2019-06-01  9:09   ` Christoph Hellwig
  2 siblings, 0 replies; 8+ messages in thread
From: Rafael J. Wysocki @ 2019-05-30  9:09 UTC (permalink / raw)


On Thu, May 23, 2019@5:32 PM Keith Busch <keith.busch@intel.com> wrote:
>
> The nvme pci driver prepares its devices for power loss during suspend
> by shutting down the controllers. The power setting is deferred to
> pci driver's power management before the platform removes power. The
> suspend-to-idle mode, however, does not remove power.
>
> NVMe devices that implement host managed power settings can achieve
> lower power and better transition latencies than using generic PCI power
> settings. Try to use this feature if the platform is not involved with
> the suspend. If successful, restore the previous power state on resume.
>
> Cc: Mario Limonciello <Mario.Limonciello at dell.com>
> Cc: Kai Heng Feng <kai.heng.feng at canonical.com>
> Signed-off-by: Keith Busch <keith.busch at intel.com>

No concerns from me:

Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki at intel.com>

> ---
> v2 -> v3:
>
>   Removed the HMB handling. We've determined this is not be
>   necessary in a s2i state.
>
>   Splitting PM ops (Rafael)
>
>   Incorporated improvements from Christoph
>
>  drivers/nvme/host/pci.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 93 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 599065ed6a32..47da55abb56b 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -18,6 +18,7 @@
>  #include <linux/mutex.h>
>  #include <linux/once.h>
>  #include <linux/pci.h>
> +#include <linux/suspend.h>
>  #include <linux/t10-pi.h>
>  #include <linux/types.h>
>  #include <linux/io-64-nonatomic-lo-hi.h>
> @@ -116,6 +117,7 @@ struct nvme_dev {
>         u32 cmbsz;
>         u32 cmbloc;
>         struct nvme_ctrl ctrl;
> +       u32 last_ps;
>
>         mempool_t *iod_mempool;
>
> @@ -2829,16 +2831,94 @@ static void nvme_remove(struct pci_dev *pdev)
>  }
>
>  #ifdef CONFIG_PM_SLEEP
> +static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps)
> +{
> +       return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps);
> +}
> +
> +static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps)
> +{
> +       return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL);
> +}
> +
> +static int nvme_resume(struct device *dev)
> +{
> +       struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
> +       struct nvme_ctrl *ctrl = &ndev->ctrl;
> +
> +       if (pm_resume_via_firmware() || !ctrl->npss ||
> +           nvme_set_power_state(ctrl, ndev->last_ps) != 0)
> +               nvme_reset_ctrl(ctrl);
> +       return 0;
> +}
> +
>  static int nvme_suspend(struct device *dev)
>  {
>         struct pci_dev *pdev = to_pci_dev(dev);
>         struct nvme_dev *ndev = pci_get_drvdata(pdev);
> +       struct nvme_ctrl *ctrl = &ndev->ctrl;
> +       int ret = -EBUSY;
> +
> +       /*
> +        * The platform does not remove power for a kernel managed suspend so
> +        * use host managed nvme power settings for lowest idle power if
> +        * possible. This should have quicker resume latency than a full device
> +        * shutdown.  But if the firmware is involved after the suspend or the
> +        * device does not support any non-default power states, shut down the
> +        * device fully.
> +        */
> +       if (pm_suspend_via_firmware() || !ctrl->npss) {
> +               nvme_dev_disable(ndev, true);
> +               return 0;
> +       }
> +
> +       nvme_start_freeze(ctrl);
> +       nvme_wait_freeze(ctrl);
> +       nvme_sync_queues(ctrl);
> +
> +       if (ctrl->state != NVME_CTRL_LIVE &&
> +           ctrl->state != NVME_CTRL_ADMIN_ONLY)
> +               goto unfreeze;
> +
> +       ndev->last_ps = 0;
> +       ret = nvme_get_power_state(ctrl, &ndev->last_ps);
> +       if (ret < 0)
> +               goto unfreeze;
> +
> +       ret = nvme_set_power_state(ctrl, ctrl->npss);
> +       if (ret < 0)
> +               goto unfreeze;
> +
> +       if (ret) {
> +               /*
> +                * Clearing npss forces a controller reset on resume. The
> +                * correct value will be resdicovered then.
> +                */
> +               nvme_dev_disable(ndev, true);
> +               ctrl->npss = 0;
> +               ret = 0;
> +               goto unfreeze;
> +       }
> +       /*
> +        * A saved state prevents pci pm from generically controlling the
> +        * device's power. If we're using protocol specific settings, we don't
> +        * want pci interfering.
> +        */
> +       pci_save_state(pdev);
> +unfreeze:
> +       nvme_unfreeze(ctrl);
> +       return ret;
> +}
> +
> +static int nvme_simple_suspend(struct device *dev)
> +{
> +       struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
>
>         nvme_dev_disable(ndev, true);
>         return 0;
>  }
>
> -static int nvme_resume(struct device *dev)
> +static int nvme_simple_resume(struct device *dev)
>  {
>         struct pci_dev *pdev = to_pci_dev(dev);
>         struct nvme_dev *ndev = pci_get_drvdata(pdev);
> @@ -2846,9 +2926,19 @@ static int nvme_resume(struct device *dev)
>         nvme_reset_ctrl(&ndev->ctrl);
>         return 0;
>  }
> -#endif
>
> -static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
> +const struct dev_pm_ops nvme_dev_pm_ops = {
> +       .suspend        = nvme_suspend,
> +       .resume         = nvme_resume,
> +       .freeze         = nvme_simple_suspend,
> +       .thaw           = nvme_simple_resume,
> +       .poweroff       = nvme_simple_suspend,
> +       .restore        = nvme_simple_resume,
> +};
> +
> +#else
> +#define nvme_dev_pm_ops                NULL
> +#endif
>
>  static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
>                                                 pci_channel_state_t state)
> --
> 2.14.4
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend
  2019-05-23 15:27 ` [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend Keith Busch
  2019-05-24  5:42   ` Kai-Heng Feng
  2019-05-30  9:09   ` Rafael J. Wysocki
@ 2019-06-01  9:09   ` Christoph Hellwig
  2 siblings, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2019-06-01  9:09 UTC (permalink / raw)


Looks good,

Reviewed-by: Christoph Hellwig <hch at lst.de>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-06-01  9:09 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-23 15:27 [PATCHv3 1/2] nvme: Export get and set features Keith Busch
2019-05-23 15:27 ` [PATCHv3 2/2] nvme-pci: Use host managed power state for suspend Keith Busch
2019-05-24  5:42   ` Kai-Heng Feng
2019-05-24 15:14     ` Mario.Limonciello
2019-05-30  9:09   ` Rafael J. Wysocki
2019-06-01  9:09   ` Christoph Hellwig
2019-05-23 15:36 ` [PATCHv3 1/2] nvme: Export get and set features Christoph Hellwig
2019-05-23 15:42   ` Keith Busch

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.