All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-22  0:25 ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)
  To: tj; +Cc: keith.busch, linux-ide, hch, linux-nvme

Some Intel ahci implementations have the capability to expose another
pci-express device's memory resources through an ahci memory bar.  Add
the enabling to detect these configurations and register the resources
for the nvme driver to consume. Otherwise, the nvme device is
effectively hidden from the kernel for this configuration.

---

Dan Williams (5):
      ahci: nvme remap support
      nvme: rename "pci" operations to "mmio"
      nvme: introduce nvme_dev_ops
      nvme: move common definitions to pci.h
      nvme: ahci remap support

 drivers/ata/Kconfig        |   11 +
 drivers/ata/ahci.c         |  165 +++++++++++++++++++++
 drivers/ata/ahci.h         |   14 ++
 drivers/nvme/host/Makefile |    2 
 drivers/nvme/host/ahci.c   |  198 +++++++++++++++++++++++++
 drivers/nvme/host/pci.c    |  344 +++++++++++++++++++++++---------------------
 drivers/nvme/host/pci.h    |   98 +++++++++++++
 7 files changed, 664 insertions(+), 168 deletions(-)
 create mode 100644 drivers/nvme/host/ahci.c
 create mode 100644 drivers/nvme/host/pci.h

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-22  0:25 ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)


Some Intel ahci implementations have the capability to expose another
pci-express device's memory resources through an ahci memory bar.  Add
the enabling to detect these configurations and register the resources
for the nvme driver to consume. Otherwise, the nvme device is
effectively hidden from the kernel for this configuration.

---

Dan Williams (5):
      ahci: nvme remap support
      nvme: rename "pci" operations to "mmio"
      nvme: introduce nvme_dev_ops
      nvme: move common definitions to pci.h
      nvme: ahci remap support

 drivers/ata/Kconfig        |   11 +
 drivers/ata/ahci.c         |  165 +++++++++++++++++++++
 drivers/ata/ahci.h         |   14 ++
 drivers/nvme/host/Makefile |    2 
 drivers/nvme/host/ahci.c   |  198 +++++++++++++++++++++++++
 drivers/nvme/host/pci.c    |  344 +++++++++++++++++++++++---------------------
 drivers/nvme/host/pci.h    |   98 +++++++++++++
 7 files changed, 664 insertions(+), 168 deletions(-)
 create mode 100644 drivers/nvme/host/ahci.c
 create mode 100644 drivers/nvme/host/pci.h

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 1/5] ahci: nvme remap support
  2016-10-22  0:25 ` Dan Williams
@ 2016-10-22  0:25   ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)
  To: tj; +Cc: keith.busch, linux-ide, hch, linux-nvme

Some Intel ahci implementations have the capability to expose another
pci-express device's memory resources through an ahci memory bar. Add
the enabling to detect these configurations and register the resources
for the nvme driver to consume. Otherwise, the nvme device is
effectively hidden from the kernel for this configuration.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/ata/Kconfig |   10 +++
 drivers/ata/ahci.c  |  165 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ata/ahci.h  |   14 ++++
 3 files changed, 188 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 2c8be74f401d..b9e46f2c69c1 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -90,6 +90,16 @@ config SATA_AHCI
 
 	  If unsure, say N.
 
+config SATA_AHCI_NVME
+	tristate "AHCI: Remapped NVMe support"
+	depends on SATA_AHCI
+	depends on BLK_DEV_NVME
+	help
+	  Support discovering remapped NVMe devices that appear in AHCI
+	  PCI memory space.
+
+	  If unsure, say N.
+
 config SATA_AHCI_PLATFORM
 	tristate "Platform AHCI SATA support"
 	help
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index ba5f11cebee2..cc2c81ae497a 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -39,6 +39,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
 #include <linux/device.h>
 #include <linux/dmi.h>
 #include <linux/gfp.h>
@@ -46,6 +47,7 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_cmnd.h>
 #include <linux/libata.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include "ahci.h"
 
 #define DRV_NAME	"ahci"
@@ -1400,6 +1402,81 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
 }
 #endif
 
+enum {
+	AHCI_VSCAP = 0xa4,
+	AHCI_REMAP_CAP = 0x800,
+	PCI_CLASS_STORAGE_EXPRESS = 0x10802,
+	/* device class code */
+	AHCI_REMAP_N_DCC = 0x880,
+	/* remap-device base relative to ahci-bar */
+	AHCI_REMAP_N_OFFSET = SZ_16K,
+	AHCI_REMAP_N_SIZE = SZ_16K,
+};
+
+static unsigned int ahci_remap_dcc(int i)
+{
+	return AHCI_REMAP_N_DCC + i * 0x80;
+}
+
+static unsigned int ahci_remap_base(int i)
+{
+	return AHCI_REMAP_N_OFFSET + i * AHCI_REMAP_N_SIZE;
+}
+
+static int ahci_remap_init(struct pci_dev *pdev, int bar,
+		struct ahci_host_priv *hpriv)
+{
+	int i, count = 0;
+	u32 cap;
+
+	/*
+	 * Check if remapped nvme devices might be present and if so
+	 * register platform resources.
+	 */
+	if (IS_ENABLED(CONFIG_SATA_AHCI_NVME)
+			&& pdev->vendor == PCI_VENDOR_ID_INTEL
+			&& pci_resource_len(pdev, bar) == SZ_512K
+			&& bar == AHCI_PCI_BAR_STANDARD
+			&& (readl(hpriv->mmio + AHCI_VSCAP) & 1))
+		/* pass */;
+	else
+		return -ENODEV;
+
+	cap = readq(hpriv->mmio + AHCI_REMAP_CAP);
+	for (i = 0; i < AHCI_MAX_REMAP; i++) {
+		struct ahci_remap *rdev;
+
+		if ((cap & (1 << i)) == 0)
+			continue;
+		if (readl(hpriv->mmio + ahci_remap_dcc(i))
+				!= PCI_CLASS_STORAGE_EXPRESS)
+			continue;
+
+		rdev = devm_kzalloc(&pdev->dev, sizeof(*rdev), GFP_KERNEL);
+		if (!rdev)
+			return -ENOMEM;
+
+		rdev->id = i;
+		rdev->mem.start = pci_resource_start(pdev, bar)
+			+ ahci_remap_base(i);
+		rdev->mem.end = rdev->mem.start + AHCI_REMAP_N_SIZE - 1;
+		rdev->mem.flags = IORESOURCE_MEM;
+
+		/*
+		 * This will be translated to kernel irq vector after
+		 * ahci irq initialization.
+		 */
+		rdev->irq.start = 0;
+		rdev->irq.end = 0;
+		rdev->irq.flags = IORESOURCE_IRQ;
+
+		hpriv->remap[i] = rdev;
+		count++;
+	}
+
+	return count > 0 ? 0 : -ENODEV;
+}
+
 static int ahci_get_irq_vector(struct ata_host *host, int port)
 {
 	return pci_irq_vector(to_pci_dev(host->dev), port);
@@ -1410,7 +1487,7 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports,
 {
 	int nvec;
 
-	if (hpriv->flags & AHCI_HFLAG_NO_MSI)
+	if (hpriv->flags & (AHCI_HFLAG_NO_MSI | AHCI_HFLAG_REMAP))
 		return -ENODEV;
 
 	/*
@@ -1452,6 +1529,58 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports,
 	return pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
 }
 
+static void ahci_remap_restore(void *data)
+{
+	struct pci_dev *pdev = data;
+	int bar = AHCI_PCI_BAR_STANDARD;
+
+	pci_resource_end(pdev, bar) = pci_resource_start(pdev, bar) + SZ_512K - 1;
+}
+
+static void ahci_remap_unregister(void *data)
+{
+	struct platform_device *pdev = data;
+
+	platform_device_del(pdev);
+	put_device(&pdev->dev);
+}
+
+static void ahci_remap_register_devices(struct device *dev,
+		struct ahci_host_priv *hpriv)
+{
+	struct platform_device *pdev;
+	int i;
+
+	if ((hpriv->flags & AHCI_HFLAG_REMAP) == 0)
+		return;
+
+	for (i = 0; i < AHCI_MAX_REMAP; i++) {
+		struct ahci_remap *rdev = hpriv->remap[i];
+
+		if (!rdev)
+			continue;
+		pdev = platform_device_alloc("ahci_nvme", rdev->id);
+		if (!pdev)
+			continue;
+		if (platform_device_add_resources(pdev, &rdev->mem, 2) != 0) {
+			put_device(&pdev->dev);
+			continue;
+		}
+
+		pdev->dev.parent = dev;
+		if (platform_device_add(pdev) != 0) {
+			put_device(&pdev->dev);
+			continue;
+		}
+
+		if (devm_add_action_or_reset(dev, ahci_remap_unregister,
+					pdev) != 0)
+			continue;
+		dev_info(dev, "remap: %s %pR %pR\n", dev_name(&pdev->dev),
+				&rdev->mem, &rdev->irq);
+	}
+}
+
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	unsigned int board_id = ent->driver_data;
@@ -1545,6 +1674,23 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
 
+	/* detect remapped nvme devices */
+	if (ahci_remap_init(pdev, ahci_pci_bar, hpriv) == 0) {
+		pcim_iounmap_regions(pdev, 1 << ahci_pci_bar);
+		pci_resource_end(pdev, ahci_pci_bar)
+			= pci_resource_start(pdev, ahci_pci_bar) + SZ_16K - 1;
+		rc = devm_add_action_or_reset(dev, ahci_remap_restore, pdev);
+		if (rc)
+			return rc;
+		rc = pcim_iomap_regions(pdev, 1 << ahci_pci_bar, DRV_NAME);
+		if (rc == -EBUSY)
+			pcim_pin_device(pdev);
+		if (rc)
+			return rc;
+		hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
+		hpriv->flags |= AHCI_HFLAG_REMAP;
+	}
+
 	/* must set flag prior to save config in order to take effect */
 	if (ahci_broken_devslp(pdev))
 		hpriv->flags |= AHCI_HFLAG_NO_DEVSLP;
@@ -1616,6 +1762,21 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ahci_init_msi(pdev, n_ports, hpriv) < 0) {
 		/* legacy intx interrupts */
 		pci_intx(pdev, 1);
+
+		/*
+		 * Don't rely on the msi-x capability in the remap case,
+		 * share the legacy interrupt across ahci and remapped
+		 * devices.
+		 */
+		if (hpriv->flags & AHCI_HFLAG_REMAP)
+			for (i = 0; i < AHCI_MAX_REMAP; i++) {
+				struct ahci_remap *rdev = hpriv->remap[i];
+
+				if (!rdev || resource_size(&rdev->irq) < 1)
+					continue;
+				rdev->irq.start = pdev->irq;
+				rdev->irq.end = rdev->irq.start;
+			}
 	}
 	hpriv->irq = pdev->irq;
 
@@ -1668,6 +1829,8 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		return rc;
 
+	ahci_remap_register_devices(&pdev->dev, hpriv);
+
 	pm_runtime_put_noidle(&pdev->dev);
 	return 0;
 }
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 0cc08f892fea..859d08979497 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -248,6 +248,8 @@ enum {
 	AHCI_HFLAG_MULTI_MSI		= 0,
 #endif
 	AHCI_HFLAG_WAKE_BEFORE_STOP	= (1 << 22), /* wake before DMA stop */
+	AHCI_HFLAG_REMAP		= (1 << 23),
+	AHCI_MAX_REMAP			= 3,
 
 	/* ap->flags bits */
 
@@ -324,6 +326,17 @@ struct ahci_port_priv {
 	char			*irq_desc;	/* desc in /proc/interrupts */
 };
 
+struct ahci_remap {
+	int id;
+	/*
+	 * @mem and @irq must be consecutive for
+	 * platform_device_add_resources() in
+	 * ahci_remap_register_devices()
+	 */
+	struct resource mem;
+	struct resource irq;
+};
+
 struct ahci_host_priv {
 	/* Input fields */
 	unsigned int		flags;		/* AHCI_HFLAG_* */
@@ -352,6 +365,7 @@ struct ahci_host_priv {
 	unsigned		nports;		/* Number of ports */
 	void			*plat_data;	/* Other platform data */
 	unsigned int		irq;		/* interrupt line */
+	struct ahci_remap	*remap[AHCI_MAX_REMAP]; /* remapped devices */
 	/*
 	 * Optional ahci_start_engine override, if not set this gets set to the
 	 * default ahci_start_engine during ahci_save_initial_config, this can


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 1/5] ahci: nvme remap support
@ 2016-10-22  0:25   ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)


Some Intel ahci implementations have the capability to expose another
pci-express device's memory resources through an ahci memory bar. Add
the enabling to detect these configurations and register the resources
for the nvme driver to consume. Otherwise, the nvme device is
effectively hidden from the kernel for this configuration.

Signed-off-by: Dan Williams <dan.j.williams at intel.com>
---
 drivers/ata/Kconfig |   10 +++
 drivers/ata/ahci.c  |  165 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ata/ahci.h  |   14 ++++
 3 files changed, 188 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 2c8be74f401d..b9e46f2c69c1 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -90,6 +90,16 @@ config SATA_AHCI
 
 	  If unsure, say N.
 
+config SATA_AHCI_NVME
+	tristate "AHCI: Remapped NVMe support"
+	depends on SATA_AHCI
+	depends on BLK_DEV_NVME
+	help
+	  Support discovering remapped NVMe devices that appear in AHCI
+	  PCI memory space.
+
+	  If unsure, say N.
+
 config SATA_AHCI_PLATFORM
 	tristate "Platform AHCI SATA support"
 	help
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index ba5f11cebee2..cc2c81ae497a 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -39,6 +39,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
 #include <linux/device.h>
 #include <linux/dmi.h>
 #include <linux/gfp.h>
@@ -46,6 +47,7 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_cmnd.h>
 #include <linux/libata.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include "ahci.h"
 
 #define DRV_NAME	"ahci"
@@ -1400,6 +1402,81 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
 }
 #endif
 
+enum {
+	AHCI_VSCAP = 0xa4,
+	AHCI_REMAP_CAP = 0x800,
+	PCI_CLASS_STORAGE_EXPRESS = 0x10802,
+	/* device class code */
+	AHCI_REMAP_N_DCC = 0x880,
+	/* remap-device base relative to ahci-bar */
+	AHCI_REMAP_N_OFFSET = SZ_16K,
+	AHCI_REMAP_N_SIZE = SZ_16K,
+};
+
+static unsigned int ahci_remap_dcc(int i)
+{
+	return AHCI_REMAP_N_DCC + i * 0x80;
+}
+
+static unsigned int ahci_remap_base(int i)
+{
+	return AHCI_REMAP_N_OFFSET + i * AHCI_REMAP_N_SIZE;
+}
+
+static int ahci_remap_init(struct pci_dev *pdev, int bar,
+		struct ahci_host_priv *hpriv)
+{
+	int i, count = 0;
+	u32 cap;
+
+	/*
+	 * Check if remapped nvme devices might be present and if so
+	 * register platform resources.
+	 */
+	if (IS_ENABLED(CONFIG_SATA_AHCI_NVME)
+			&& pdev->vendor == PCI_VENDOR_ID_INTEL
+			&& pci_resource_len(pdev, bar) == SZ_512K
+			&& bar == AHCI_PCI_BAR_STANDARD
+			&& (readl(hpriv->mmio + AHCI_VSCAP) & 1))
+		/* pass */;
+	else
+		return -ENODEV;
+
+	cap = readq(hpriv->mmio + AHCI_REMAP_CAP);
+	for (i = 0; i < AHCI_MAX_REMAP; i++) {
+		struct ahci_remap *rdev;
+
+		if ((cap & (1 << i)) == 0)
+			continue;
+		if (readl(hpriv->mmio + ahci_remap_dcc(i))
+				!= PCI_CLASS_STORAGE_EXPRESS)
+			continue;
+
+		rdev = devm_kzalloc(&pdev->dev, sizeof(*rdev), GFP_KERNEL);
+		if (!rdev)
+			return -ENOMEM;
+
+		rdev->id = i;
+		rdev->mem.start = pci_resource_start(pdev, bar)
+			+ ahci_remap_base(i);
+		rdev->mem.end = rdev->mem.start + AHCI_REMAP_N_SIZE - 1;
+		rdev->mem.flags = IORESOURCE_MEM;
+
+		/*
+		 * This will be translated to kernel irq vector after
+		 * ahci irq initialization.
+		 */
+		rdev->irq.start = 0;
+		rdev->irq.end = 0;
+		rdev->irq.flags = IORESOURCE_IRQ;
+
+		hpriv->remap[i] = rdev;
+		count++;
+	}
+
+	return count > 0 ? 0 : -ENODEV;
+}
+
 static int ahci_get_irq_vector(struct ata_host *host, int port)
 {
 	return pci_irq_vector(to_pci_dev(host->dev), port);
@@ -1410,7 +1487,7 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports,
 {
 	int nvec;
 
-	if (hpriv->flags & AHCI_HFLAG_NO_MSI)
+	if (hpriv->flags & (AHCI_HFLAG_NO_MSI | AHCI_HFLAG_REMAP))
 		return -ENODEV;
 
 	/*
@@ -1452,6 +1529,58 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports,
 	return pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
 }
 
+static void ahci_remap_restore(void *data)
+{
+	struct pci_dev *pdev = data;
+	int bar = AHCI_PCI_BAR_STANDARD;
+
+	pci_resource_end(pdev, bar) = pci_resource_start(pdev, bar) + SZ_512K - 1;
+}
+
+static void ahci_remap_unregister(void *data)
+{
+	struct platform_device *pdev = data;
+
+	platform_device_del(pdev);
+	put_device(&pdev->dev);
+}
+
+static void ahci_remap_register_devices(struct device *dev,
+		struct ahci_host_priv *hpriv)
+{
+	struct platform_device *pdev;
+	int i;
+
+	if ((hpriv->flags & AHCI_HFLAG_REMAP) == 0)
+		return;
+
+	for (i = 0; i < AHCI_MAX_REMAP; i++) {
+		struct ahci_remap *rdev = hpriv->remap[i];
+
+		if (!rdev)
+			continue;
+		pdev = platform_device_alloc("ahci_nvme", rdev->id);
+		if (!pdev)
+			continue;
+		if (platform_device_add_resources(pdev, &rdev->mem, 2) != 0) {
+			put_device(&pdev->dev);
+			continue;
+		}
+
+		pdev->dev.parent = dev;
+		if (platform_device_add(pdev) != 0) {
+			put_device(&pdev->dev);
+			continue;
+		}
+
+		if (devm_add_action_or_reset(dev, ahci_remap_unregister,
+					pdev) != 0)
+			continue;
+		dev_info(dev, "remap: %s %pR %pR\n", dev_name(&pdev->dev),
+				&rdev->mem, &rdev->irq);
+	}
+}
+
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	unsigned int board_id = ent->driver_data;
@@ -1545,6 +1674,23 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
 
+	/* detect remapped nvme devices */
+	if (ahci_remap_init(pdev, ahci_pci_bar, hpriv) == 0) {
+		pcim_iounmap_regions(pdev, 1 << ahci_pci_bar);
+		pci_resource_end(pdev, ahci_pci_bar)
+			= pci_resource_start(pdev, ahci_pci_bar) + SZ_16K - 1;
+		rc = devm_add_action_or_reset(dev, ahci_remap_restore, pdev);
+		if (rc)
+			return rc;
+		rc = pcim_iomap_regions(pdev, 1 << ahci_pci_bar, DRV_NAME);
+		if (rc == -EBUSY)
+			pcim_pin_device(pdev);
+		if (rc)
+			return rc;
+		hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar];
+		hpriv->flags |= AHCI_HFLAG_REMAP;
+	}
+
 	/* must set flag prior to save config in order to take effect */
 	if (ahci_broken_devslp(pdev))
 		hpriv->flags |= AHCI_HFLAG_NO_DEVSLP;
@@ -1616,6 +1762,21 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ahci_init_msi(pdev, n_ports, hpriv) < 0) {
 		/* legacy intx interrupts */
 		pci_intx(pdev, 1);
+
+		/*
+		 * Don't rely on the msi-x capability in the remap case,
+		 * share the legacy interrupt across ahci and remapped
+		 * devices.
+		 */
+		if (hpriv->flags & AHCI_HFLAG_REMAP)
+			for (i = 0; i < AHCI_MAX_REMAP; i++) {
+				struct ahci_remap *rdev = hpriv->remap[i];
+
+				if (!rdev || resource_size(&rdev->irq) < 1)
+					continue;
+				rdev->irq.start = pdev->irq;
+				rdev->irq.end = rdev->irq.start;
+			}
 	}
 	hpriv->irq = pdev->irq;
 
@@ -1668,6 +1829,8 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		return rc;
 
+	ahci_remap_register_devices(&pdev->dev, hpriv);
+
 	pm_runtime_put_noidle(&pdev->dev);
 	return 0;
 }
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 0cc08f892fea..859d08979497 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -248,6 +248,8 @@ enum {
 	AHCI_HFLAG_MULTI_MSI		= 0,
 #endif
 	AHCI_HFLAG_WAKE_BEFORE_STOP	= (1 << 22), /* wake before DMA stop */
+	AHCI_HFLAG_REMAP		= (1 << 23),
+	AHCI_MAX_REMAP			= 3,
 
 	/* ap->flags bits */
 
@@ -324,6 +326,17 @@ struct ahci_port_priv {
 	char			*irq_desc;	/* desc in /proc/interrupts */
 };
 
+struct ahci_remap {
+	int id;
+	/*
+	 * @mem and @irq must be consecutive for
+	 * platform_device_add_resources() in
+	 * ahci_remap_register_devices()
+	 */
+	struct resource mem;
+	struct resource irq;
+};
+
 struct ahci_host_priv {
 	/* Input fields */
 	unsigned int		flags;		/* AHCI_HFLAG_* */
@@ -352,6 +365,7 @@ struct ahci_host_priv {
 	unsigned		nports;		/* Number of ports */
 	void			*plat_data;	/* Other platform data */
 	unsigned int		irq;		/* interrupt line */
+	struct ahci_remap	*remap[AHCI_MAX_REMAP]; /* remapped devices */
 	/*
 	 * Optional ahci_start_engine override, if not set this gets set to the
 	 * default ahci_start_engine during ahci_save_initial_config, this can

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/5] nvme: rename "pci" operations to "mmio"
  2016-10-22  0:25 ` Dan Williams
@ 2016-10-22  0:25   ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)
  To: tj; +Cc: keith.busch, linux-ide, hch, linux-nvme

In preparation for adding a platform_device nvme host, rename to a more
generic "mmio" prefix.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvme/host/pci.c |   30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0fc99f0f2571..8c4330b95be8 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -771,7 +771,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	return 0;
 }
 
-static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
+static void nvme_mmio_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
 {
 	struct nvme_dev *dev = to_nvme_dev(ctrl);
 	struct nvme_queue *nvmeq = dev->queues[0];
@@ -1704,7 +1704,7 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
 	dma_pool_destroy(dev->prp_small_pool);
 }
 
-static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
+static void nvme_mmio_free_ctrl(struct nvme_ctrl *ctrl)
 {
 	struct nvme_dev *dev = to_nvme_dev(ctrl);
 
@@ -1826,38 +1826,38 @@ static int nvme_reset(struct nvme_dev *dev)
 	return 0;
 }
 
-static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+static int nvme_mmio_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 {
 	*val = readl(to_nvme_dev(ctrl)->bar + off);
 	return 0;
 }
 
-static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
+static int nvme_mmio_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
 {
 	writel(val, to_nvme_dev(ctrl)->bar + off);
 	return 0;
 }
 
-static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+static int nvme_mmio_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 {
 	*val = readq(to_nvme_dev(ctrl)->bar + off);
 	return 0;
 }
 
-static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
+static int nvme_mmio_reset_ctrl(struct nvme_ctrl *ctrl)
 {
 	return nvme_reset(to_nvme_dev(ctrl));
 }
 
-static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
-	.name			= "pcie",
+static const struct nvme_ctrl_ops nvme_mmio_ctrl_ops = {
+	.name			= "mmio",
 	.module			= THIS_MODULE,
-	.reg_read32		= nvme_pci_reg_read32,
-	.reg_write32		= nvme_pci_reg_write32,
-	.reg_read64		= nvme_pci_reg_read64,
-	.reset_ctrl		= nvme_pci_reset_ctrl,
-	.free_ctrl		= nvme_pci_free_ctrl,
-	.submit_async_event	= nvme_pci_submit_async_event,
+	.reg_read32		= nvme_mmio_reg_read32,
+	.reg_write32		= nvme_mmio_reg_write32,
+	.reg_read64		= nvme_mmio_reg_read64,
+	.reset_ctrl		= nvme_mmio_reset_ctrl,
+	.free_ctrl		= nvme_mmio_free_ctrl,
+	.submit_async_event	= nvme_mmio_submit_async_event,
 };
 
 static int nvme_dev_map(struct nvme_dev *dev)
@@ -1912,7 +1912,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto put_pci;
 
-	result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
+	result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_mmio_ctrl_ops,
 			id->driver_data);
 	if (result)
 		goto release_pools;


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/5] nvme: rename "pci" operations to "mmio"
@ 2016-10-22  0:25   ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)


In preparation for adding a platform_device nvme host, rename to a more
generic "mmio" prefix.

Signed-off-by: Dan Williams <dan.j.williams at intel.com>
---
 drivers/nvme/host/pci.c |   30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0fc99f0f2571..8c4330b95be8 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -771,7 +771,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	return 0;
 }
 
-static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
+static void nvme_mmio_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx)
 {
 	struct nvme_dev *dev = to_nvme_dev(ctrl);
 	struct nvme_queue *nvmeq = dev->queues[0];
@@ -1704,7 +1704,7 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
 	dma_pool_destroy(dev->prp_small_pool);
 }
 
-static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
+static void nvme_mmio_free_ctrl(struct nvme_ctrl *ctrl)
 {
 	struct nvme_dev *dev = to_nvme_dev(ctrl);
 
@@ -1826,38 +1826,38 @@ static int nvme_reset(struct nvme_dev *dev)
 	return 0;
 }
 
-static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+static int nvme_mmio_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 {
 	*val = readl(to_nvme_dev(ctrl)->bar + off);
 	return 0;
 }
 
-static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
+static int nvme_mmio_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
 {
 	writel(val, to_nvme_dev(ctrl)->bar + off);
 	return 0;
 }
 
-static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+static int nvme_mmio_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 {
 	*val = readq(to_nvme_dev(ctrl)->bar + off);
 	return 0;
 }
 
-static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
+static int nvme_mmio_reset_ctrl(struct nvme_ctrl *ctrl)
 {
 	return nvme_reset(to_nvme_dev(ctrl));
 }
 
-static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
-	.name			= "pcie",
+static const struct nvme_ctrl_ops nvme_mmio_ctrl_ops = {
+	.name			= "mmio",
 	.module			= THIS_MODULE,
-	.reg_read32		= nvme_pci_reg_read32,
-	.reg_write32		= nvme_pci_reg_write32,
-	.reg_read64		= nvme_pci_reg_read64,
-	.reset_ctrl		= nvme_pci_reset_ctrl,
-	.free_ctrl		= nvme_pci_free_ctrl,
-	.submit_async_event	= nvme_pci_submit_async_event,
+	.reg_read32		= nvme_mmio_reg_read32,
+	.reg_write32		= nvme_mmio_reg_write32,
+	.reg_read64		= nvme_mmio_reg_read64,
+	.reset_ctrl		= nvme_mmio_reset_ctrl,
+	.free_ctrl		= nvme_mmio_free_ctrl,
+	.submit_async_event	= nvme_mmio_submit_async_event,
 };
 
 static int nvme_dev_map(struct nvme_dev *dev)
@@ -1912,7 +1912,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto put_pci;
 
-	result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
+	result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_mmio_ctrl_ops,
 			id->driver_data);
 	if (result)
 		goto release_pools;

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/5] nvme: introduce nvme_dev_ops
  2016-10-22  0:25 ` Dan Williams
@ 2016-10-22  0:25   ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)
  To: tj; +Cc: keith.busch, linux-ide, hch, linux-nvme

In preparation for a platform device nvme driver, move the bus specific
portions of nvme to nvme_dev_ops, or otherwise rewrite routines to use a
generic 'struct device' instead of 'struct pci_dev'.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvme/host/pci.c |  258 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 165 insertions(+), 93 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8c4330b95be8..ea1c623ed257 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -73,6 +73,16 @@ static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
+struct nvme_dev_ops {
+	int (*enable)(struct nvme_dev *dev);
+	void (*disable)(struct nvme_dev *dev);
+	int (*map_irq)(struct nvme_dev *dev, int nr_io_queues);
+	int (*q_irq)(struct nvme_queue *q);
+	int (*is_enabled)(struct nvme_dev *dev);
+	int (*is_offline)(struct nvme_dev *dev);
+	bool (*is_present)(struct nvme_dev *dev);
+};
+
 /*
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
  */
@@ -101,6 +111,8 @@ struct nvme_dev {
 	u32 cmbsz;
 	struct nvme_ctrl ctrl;
 	struct completion ioq_wait;
+	const struct resource *res;
+	const struct nvme_dev_ops *ops;
 };
 
 static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
@@ -201,7 +213,7 @@ static unsigned int nvme_cmd_size(struct nvme_dev *dev)
 		nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
 }
 
-static int nvmeq_irq(struct nvme_queue *nvmeq)
+static int nvme_pci_q_irq(struct nvme_queue *nvmeq)
 {
 	return pci_irq_vector(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector);
 }
@@ -973,7 +985,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 		spin_unlock_irq(&nvmeq->q_lock);
 		return 1;
 	}
-	vector = nvmeq_irq(nvmeq);
+	vector = nvmeq->dev->ops->q_irq(nvmeq);
 	nvmeq->dev->online_queues--;
 	nvmeq->cq_vector = -1;
 	spin_unlock_irq(&nvmeq->q_lock);
@@ -1089,12 +1101,15 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 
 static int queue_request_irq(struct nvme_queue *nvmeq)
 {
+	struct nvme_dev *dev = nvmeq->dev;
+
 	if (use_threaded_interrupts)
-		return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check,
-				nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq);
-	else
-		return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED,
+		return request_threaded_irq(dev->ops->q_irq(nvmeq),
+				nvme_irq_check, nvme_irq, IRQF_SHARED,
 				nvmeq->irqname, nvmeq);
+	else
+		return request_irq(dev->ops->q_irq(nvmeq), nvme_irq,
+				IRQF_SHARED, nvmeq->irqname, nvmeq);
 }
 
 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
@@ -1278,7 +1293,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
 	/* If PCI error recovery process is happening, we cannot reset or
 	 * the recovery mechanism will surely fail.
 	 */
-	if (pci_channel_offline(to_pci_dev(dev->dev)))
+	if (dev->ops->is_offline(dev))
 		return false;
 
 	return true;
@@ -1331,7 +1346,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 	return ret >= 0 ? 0 : ret;
 }
 
-static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
+static void __iomem *nvme_pci_map_cmb(struct nvme_dev *dev)
 {
 	u64 szu, size, offset;
 	u32 cmbloc;
@@ -1388,10 +1403,27 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
 }
 
+static int nvme_pci_map_irq(struct nvme_dev *dev, int nr_io_queues)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct nvme_queue *adminq = dev->queues[0];
+
+	/* Deregister the admin queue's interrupt */
+	free_irq(pci_irq_vector(pdev, 0), adminq);
+
+	/*
+	 * If we enable msix early due to not intx, disable it again before
+	 * setting up the full range we need.
+	 */
+	pci_free_irq_vectors(pdev);
+	return pci_alloc_irq_vectors(pdev, 1, nr_io_queues,
+			PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY);
+}
+
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
 	struct nvme_queue *adminq = dev->queues[0];
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct device *ddev = dev->dev;
 	int result, nr_io_queues, size;
 
 	nr_io_queues = num_online_cpus();
@@ -1413,9 +1445,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 
 	size = db_bar_size(dev, nr_io_queues);
 	if (size > 8192) {
-		iounmap(dev->bar);
+		devm_iounmap(ddev, dev->bar);
 		do {
-			dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+			dev->bar = devm_ioremap(ddev, dev->res->start, size);
 			if (dev->bar)
 				break;
 			if (!--nr_io_queues)
@@ -1426,19 +1458,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 		adminq->q_db = dev->dbs;
 	}
 
-	/* Deregister the admin queue's interrupt */
-	free_irq(pci_irq_vector(pdev, 0), adminq);
-
-	/*
-	 * If we enable msix early due to not intx, disable it again before
-	 * setting up the full range we need.
-	 */
-	pci_free_irq_vectors(pdev);
-	nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues,
-			PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY);
-	if (nr_io_queues <= 0)
+	dev->max_qid = dev->ops->map_irq(dev, nr_io_queues);
+	if (dev->max_qid <= 0)
 		return -EIO;
-	dev->max_qid = nr_io_queues;
 
 	/*
 	 * Should investigate if there's a performance win from allocating
@@ -1570,9 +1592,24 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	return 0;
 }
 
-static int nvme_pci_enable(struct nvme_dev *dev)
+static int nvme_enable(struct nvme_dev *dev)
 {
 	u64 cap;
+
+	if (readl(dev->bar + NVME_REG_CSTS) == -1)
+		return -ENODEV;
+
+	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+
+	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
+	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+	dev->dbs = dev->bar + 4096;
+
+	return 0;
+}
+
+static int nvme_pci_enable(struct nvme_dev *dev)
+{
 	int result = -ENOMEM;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
@@ -1581,15 +1618,6 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 
 	pci_set_master(pdev);
 
-	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
-	    dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
-		goto disable;
-
-	if (readl(dev->bar + NVME_REG_CSTS) == -1) {
-		result = -ENODEV;
-		goto disable;
-	}
-
 	/*
 	 * Some devices and/or platforms don't advertise or work with INTx
 	 * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
@@ -1599,11 +1627,13 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	if (result < 0)
 		return result;
 
-	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
+	    dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
+		return -ENXIO;
 
-	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
-	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
-	dev->dbs = dev->bar + 4096;
+	result = nvme_enable(dev);
+	if (result)
+		goto disable;
 
 	/*
 	 * Temporary fix for the Apple controller found in the MacBook8,1 and
@@ -1617,7 +1647,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	}
 
 	if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2))
-		dev->cmb = nvme_map_cmb(dev);
+		dev->cmb = nvme_pci_map_cmb(dev);
 
 	pci_enable_pcie_error_reporting(pdev);
 	pci_save_state(pdev);
@@ -1628,13 +1658,6 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	return result;
 }
 
-static void nvme_dev_unmap(struct nvme_dev *dev)
-{
-	if (dev->bar)
-		iounmap(dev->bar);
-	pci_release_mem_regions(to_pci_dev(dev->dev));
-}
-
 static void nvme_pci_disable(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -1647,6 +1670,21 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 	}
 }
 
+static int nvme_pci_is_enabled(struct nvme_dev *dev)
+{
+	return pci_is_enabled(to_pci_dev(dev->dev));
+}
+
+static int nvme_pci_is_offline(struct nvme_dev *dev)
+{
+	return pci_channel_offline(to_pci_dev(dev->dev));
+}
+
+static bool nvme_pci_is_present(struct nvme_dev *dev)
+{
+	return pci_device_is_present(to_pci_dev(dev->dev));
+}
+
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i;
@@ -1655,7 +1693,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	del_timer_sync(&dev->watchdog_timer);
 
 	mutex_lock(&dev->shutdown_lock);
-	if (pci_is_enabled(to_pci_dev(dev->dev))) {
+	if (dev->ops->is_enabled(dev)) {
 		nvme_stop_queues(&dev->ctrl);
 		csts = readl(dev->bar + NVME_REG_CSTS);
 	}
@@ -1674,7 +1712,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 		nvme_disable_io_queues(dev);
 		nvme_disable_admin_queue(dev, shutdown);
 	}
-	nvme_pci_disable(dev);
+	dev->ops->disable(dev);
 
 	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
 	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
@@ -1745,7 +1783,7 @@ static void nvme_reset_work(struct work_struct *work)
 	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
 		goto out;
 
-	result = nvme_pci_enable(dev);
+	result = dev->ops->enable(dev);
 	if (result)
 		goto out;
 
@@ -1806,11 +1844,11 @@ static void nvme_reset_work(struct work_struct *work)
 static void nvme_remove_dead_ctrl_work(struct work_struct *work)
 {
 	struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct device *ddev = dev->dev;
 
 	nvme_kill_queues(&dev->ctrl);
-	if (pci_get_drvdata(pdev))
-		device_release_driver(&pdev->dev);
+	if (dev_get_drvdata(ddev))
+		device_release_driver(ddev);
 	nvme_put_ctrl(&dev->ctrl);
 }
 
@@ -1860,31 +1898,34 @@ static const struct nvme_ctrl_ops nvme_mmio_ctrl_ops = {
 	.submit_async_event	= nvme_mmio_submit_async_event,
 };
 
-static int nvme_dev_map(struct nvme_dev *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
-
-	if (pci_request_mem_regions(pdev, "nvme"))
-		return -ENODEV;
-
-	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
-	if (!dev->bar)
-		goto release;
-
-       return 0;
-  release:
-       pci_release_mem_regions(pdev);
-       return -ENODEV;
-}
+static const struct nvme_dev_ops nvme_pci_dev_ops = {
+	.enable			= nvme_pci_enable,
+	.disable		= nvme_pci_disable,
+	.map_irq		= nvme_pci_map_irq,
+	.q_irq			= nvme_pci_q_irq,
+	.is_enabled		= nvme_pci_is_enabled,
+	.is_offline		= nvme_pci_is_offline,
+	.is_present		= nvme_pci_is_present,
+};
 
-static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int nvme_probe(struct device *ddev, struct resource *res,
+		const struct nvme_dev_ops *ops, unsigned long quirks)
 {
 	int node, result = -ENOMEM;
 	struct nvme_dev *dev;
 
-	node = dev_to_node(&pdev->dev);
+	if (!ops || !ops->enable
+		 || !ops->disable
+		 || !ops->map_irq
+		 || !ops->q_irq
+		 || !ops->is_enabled
+		 || !ops->is_offline
+		 || !ops->is_present)
+		return -EINVAL;
+
+	node = dev_to_node(ddev);
 	if (node == NUMA_NO_NODE)
-		set_dev_node(&pdev->dev, first_memory_node);
+		set_dev_node(ddev, first_memory_node);
 
 	dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
 	if (!dev)
@@ -1894,12 +1935,16 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!dev->queues)
 		goto free;
 
-	dev->dev = get_device(&pdev->dev);
-	pci_set_drvdata(pdev, dev);
+	dev->ops = ops;
+	dev->res = res;
+	dev->dev = get_device(ddev);
+	dev_set_drvdata(ddev, dev);
 
-	result = nvme_dev_map(dev);
-	if (result)
+	dev->bar = devm_ioremap(ddev, dev->res->start, 8192);
+	if (!dev->bar) {
+		result = -ENODEV;
 		goto free;
+	}
 
 	INIT_WORK(&dev->reset_work, nvme_reset_work);
 	INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
@@ -1910,29 +1955,53 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	result = nvme_setup_prp_pools(dev);
 	if (result)
-		goto put_pci;
+		goto put_dev;
 
-	result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_mmio_ctrl_ops,
-			id->driver_data);
+	result = nvme_init_ctrl(&dev->ctrl, ddev, &nvme_mmio_ctrl_ops,
+			quirks);
 	if (result)
 		goto release_pools;
 
-	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
+	dev_info(dev->ctrl.device, "%s function %s\n",
+			ddev->bus ? ddev->bus->name : "", dev_name(ddev));
 
 	queue_work(nvme_workq, &dev->reset_work);
 	return 0;
 
  release_pools:
 	nvme_release_prp_pools(dev);
- put_pci:
+ put_dev:
 	put_device(dev->dev);
-	nvme_dev_unmap(dev);
  free:
 	kfree(dev->queues);
 	kfree(dev);
 	return result;
 }
 
+static void nvme_pci_release_regions(void *data)
+{
+	struct pci_dev *pdev = data;
+
+	pci_release_mem_regions(pdev);
+}
+
+static int nvme_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int rc;
+
+	rc = pci_request_mem_regions(pdev, "nvme");
+	if (rc)
+		return rc;
+
+	rc = devm_add_action_or_reset(&pdev->dev, nvme_pci_release_regions,
+			pdev);
+	if (rc)
+		return rc;
+
+	return nvme_probe(&pdev->dev, &pdev->resource[0], &nvme_pci_dev_ops,
+			id->driver_data);
+}
+
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
@@ -1943,9 +2012,10 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 		queue_work(nvme_workq, &dev->reset_work);
 }
 
-static void nvme_shutdown(struct pci_dev *pdev)
+static void nvme_pci_shutdown(struct pci_dev *pdev)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
+
 	nvme_dev_disable(dev, true);
 }
 
@@ -1954,15 +2024,15 @@ static void nvme_shutdown(struct pci_dev *pdev)
  * state. This function must not have any dependencies on the device state in
  * order to proceed.
  */
-static void nvme_remove(struct pci_dev *pdev)
+static void nvme_remove(struct device *ddev)
 {
-	struct nvme_dev *dev = pci_get_drvdata(pdev);
+	struct nvme_dev *dev = dev_get_drvdata(ddev);
 
 	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 
-	pci_set_drvdata(pdev, NULL);
+	dev_set_drvdata(ddev, NULL);
 
-	if (!pci_device_is_present(pdev))
+	if (!dev->ops->is_present(dev))
 		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
 
 	flush_work(&dev->reset_work);
@@ -1972,10 +2042,14 @@ static void nvme_remove(struct pci_dev *pdev)
 	nvme_free_queues(dev, 0);
 	nvme_release_cmb(dev);
 	nvme_release_prp_pools(dev);
-	nvme_dev_unmap(dev);
 	nvme_put_ctrl(&dev->ctrl);
 }
 
+static void nvme_pci_remove(struct pci_dev *pdev)
+{
+	nvme_remove(&pdev->dev);
+}
+
 static int nvme_pci_sriov_configure(struct pci_dev *pdev, int numvfs)
 {
 	int ret = 0;
@@ -1997,8 +2071,7 @@ static int nvme_pci_sriov_configure(struct pci_dev *pdev, int numvfs)
 #ifdef CONFIG_PM_SLEEP
 static int nvme_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct nvme_dev *ndev = pci_get_drvdata(pdev);
+	struct nvme_dev *ndev = dev_get_drvdata(dev);
 
 	nvme_dev_disable(ndev, true);
 	return 0;
@@ -2006,8 +2079,7 @@ static int nvme_suspend(struct device *dev)
 
 static int nvme_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct nvme_dev *ndev = pci_get_drvdata(pdev);
+	struct nvme_dev *ndev = dev_get_drvdata(dev);
 
 	queue_work(nvme_workq, &ndev->reset_work);
 	return 0;
@@ -2092,9 +2164,9 @@ MODULE_DEVICE_TABLE(pci, nvme_id_table);
 static struct pci_driver nvme_driver = {
 	.name		= "nvme",
 	.id_table	= nvme_id_table,
-	.probe		= nvme_probe,
-	.remove		= nvme_remove,
-	.shutdown	= nvme_shutdown,
+	.probe		= nvme_pci_probe,
+	.remove		= nvme_pci_remove,
+	.shutdown	= nvme_pci_shutdown,
 	.driver		= {
 		.pm	= &nvme_dev_pm_ops,
 	},


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/5] nvme: introduce nvme_dev_ops
@ 2016-10-22  0:25   ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)


In preparation for a platform device nvme driver, move the bus specific
portions of nvme to nvme_dev_ops, or otherwise rewrite routines to use a
generic 'struct device' instead of 'struct pci_dev'.

Signed-off-by: Dan Williams <dan.j.williams at intel.com>
---
 drivers/nvme/host/pci.c |  258 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 165 insertions(+), 93 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8c4330b95be8..ea1c623ed257 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -73,6 +73,16 @@ static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
+struct nvme_dev_ops {
+	int (*enable)(struct nvme_dev *dev);
+	void (*disable)(struct nvme_dev *dev);
+	int (*map_irq)(struct nvme_dev *dev, int nr_io_queues);
+	int (*q_irq)(struct nvme_queue *q);
+	int (*is_enabled)(struct nvme_dev *dev);
+	int (*is_offline)(struct nvme_dev *dev);
+	bool (*is_present)(struct nvme_dev *dev);
+};
+
 /*
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
  */
@@ -101,6 +111,8 @@ struct nvme_dev {
 	u32 cmbsz;
 	struct nvme_ctrl ctrl;
 	struct completion ioq_wait;
+	const struct resource *res;
+	const struct nvme_dev_ops *ops;
 };
 
 static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
@@ -201,7 +213,7 @@ static unsigned int nvme_cmd_size(struct nvme_dev *dev)
 		nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
 }
 
-static int nvmeq_irq(struct nvme_queue *nvmeq)
+static int nvme_pci_q_irq(struct nvme_queue *nvmeq)
 {
 	return pci_irq_vector(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector);
 }
@@ -973,7 +985,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 		spin_unlock_irq(&nvmeq->q_lock);
 		return 1;
 	}
-	vector = nvmeq_irq(nvmeq);
+	vector = nvmeq->dev->ops->q_irq(nvmeq);
 	nvmeq->dev->online_queues--;
 	nvmeq->cq_vector = -1;
 	spin_unlock_irq(&nvmeq->q_lock);
@@ -1089,12 +1101,15 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 
 static int queue_request_irq(struct nvme_queue *nvmeq)
 {
+	struct nvme_dev *dev = nvmeq->dev;
+
 	if (use_threaded_interrupts)
-		return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check,
-				nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq);
-	else
-		return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED,
+		return request_threaded_irq(dev->ops->q_irq(nvmeq),
+				nvme_irq_check, nvme_irq, IRQF_SHARED,
 				nvmeq->irqname, nvmeq);
+	else
+		return request_irq(dev->ops->q_irq(nvmeq), nvme_irq,
+				IRQF_SHARED, nvmeq->irqname, nvmeq);
 }
 
 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
@@ -1278,7 +1293,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
 	/* If PCI error recovery process is happening, we cannot reset or
 	 * the recovery mechanism will surely fail.
 	 */
-	if (pci_channel_offline(to_pci_dev(dev->dev)))
+	if (dev->ops->is_offline(dev))
 		return false;
 
 	return true;
@@ -1331,7 +1346,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 	return ret >= 0 ? 0 : ret;
 }
 
-static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
+static void __iomem *nvme_pci_map_cmb(struct nvme_dev *dev)
 {
 	u64 szu, size, offset;
 	u32 cmbloc;
@@ -1388,10 +1403,27 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
 }
 
+static int nvme_pci_map_irq(struct nvme_dev *dev, int nr_io_queues)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct nvme_queue *adminq = dev->queues[0];
+
+	/* Deregister the admin queue's interrupt */
+	free_irq(pci_irq_vector(pdev, 0), adminq);
+
+	/*
+	 * If we enable msix early due to not intx, disable it again before
+	 * setting up the full range we need.
+	 */
+	pci_free_irq_vectors(pdev);
+	return pci_alloc_irq_vectors(pdev, 1, nr_io_queues,
+			PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY);
+}
+
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
 	struct nvme_queue *adminq = dev->queues[0];
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct device *ddev = dev->dev;
 	int result, nr_io_queues, size;
 
 	nr_io_queues = num_online_cpus();
@@ -1413,9 +1445,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 
 	size = db_bar_size(dev, nr_io_queues);
 	if (size > 8192) {
-		iounmap(dev->bar);
+		devm_iounmap(ddev, dev->bar);
 		do {
-			dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+			dev->bar = devm_ioremap(ddev, dev->res->start, size);
 			if (dev->bar)
 				break;
 			if (!--nr_io_queues)
@@ -1426,19 +1458,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 		adminq->q_db = dev->dbs;
 	}
 
-	/* Deregister the admin queue's interrupt */
-	free_irq(pci_irq_vector(pdev, 0), adminq);
-
-	/*
-	 * If we enable msix early due to not intx, disable it again before
-	 * setting up the full range we need.
-	 */
-	pci_free_irq_vectors(pdev);
-	nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues,
-			PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY);
-	if (nr_io_queues <= 0)
+	dev->max_qid = dev->ops->map_irq(dev, nr_io_queues);
+	if (dev->max_qid <= 0)
 		return -EIO;
-	dev->max_qid = nr_io_queues;
 
 	/*
 	 * Should investigate if there's a performance win from allocating
@@ -1570,9 +1592,24 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	return 0;
 }
 
-static int nvme_pci_enable(struct nvme_dev *dev)
+static int nvme_enable(struct nvme_dev *dev)
 {
 	u64 cap;
+
+	if (readl(dev->bar + NVME_REG_CSTS) == -1)
+		return -ENODEV;
+
+	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+
+	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
+	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+	dev->dbs = dev->bar + 4096;
+
+	return 0;
+}
+
+static int nvme_pci_enable(struct nvme_dev *dev)
+{
 	int result = -ENOMEM;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
@@ -1581,15 +1618,6 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 
 	pci_set_master(pdev);
 
-	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
-	    dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
-		goto disable;
-
-	if (readl(dev->bar + NVME_REG_CSTS) == -1) {
-		result = -ENODEV;
-		goto disable;
-	}
-
 	/*
 	 * Some devices and/or platforms don't advertise or work with INTx
 	 * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
@@ -1599,11 +1627,13 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	if (result < 0)
 		return result;
 
-	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
+	    dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
+		return -ENXIO;
 
-	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
-	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
-	dev->dbs = dev->bar + 4096;
+	result = nvme_enable(dev);
+	if (result)
+		goto disable;
 
 	/*
 	 * Temporary fix for the Apple controller found in the MacBook8,1 and
@@ -1617,7 +1647,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	}
 
 	if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2))
-		dev->cmb = nvme_map_cmb(dev);
+		dev->cmb = nvme_pci_map_cmb(dev);
 
 	pci_enable_pcie_error_reporting(pdev);
 	pci_save_state(pdev);
@@ -1628,13 +1658,6 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	return result;
 }
 
-static void nvme_dev_unmap(struct nvme_dev *dev)
-{
-	if (dev->bar)
-		iounmap(dev->bar);
-	pci_release_mem_regions(to_pci_dev(dev->dev));
-}
-
 static void nvme_pci_disable(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -1647,6 +1670,21 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 	}
 }
 
+static int nvme_pci_is_enabled(struct nvme_dev *dev)
+{
+	return pci_is_enabled(to_pci_dev(dev->dev));
+}
+
+static int nvme_pci_is_offline(struct nvme_dev *dev)
+{
+	return pci_channel_offline(to_pci_dev(dev->dev));
+}
+
+static bool nvme_pci_is_present(struct nvme_dev *dev)
+{
+	return pci_device_is_present(to_pci_dev(dev->dev));
+}
+
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i;
@@ -1655,7 +1693,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	del_timer_sync(&dev->watchdog_timer);
 
 	mutex_lock(&dev->shutdown_lock);
-	if (pci_is_enabled(to_pci_dev(dev->dev))) {
+	if (dev->ops->is_enabled(dev)) {
 		nvme_stop_queues(&dev->ctrl);
 		csts = readl(dev->bar + NVME_REG_CSTS);
 	}
@@ -1674,7 +1712,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 		nvme_disable_io_queues(dev);
 		nvme_disable_admin_queue(dev, shutdown);
 	}
-	nvme_pci_disable(dev);
+	dev->ops->disable(dev);
 
 	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
 	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
@@ -1745,7 +1783,7 @@ static void nvme_reset_work(struct work_struct *work)
 	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
 		goto out;
 
-	result = nvme_pci_enable(dev);
+	result = dev->ops->enable(dev);
 	if (result)
 		goto out;
 
@@ -1806,11 +1844,11 @@ static void nvme_reset_work(struct work_struct *work)
 static void nvme_remove_dead_ctrl_work(struct work_struct *work)
 {
 	struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct device *ddev = dev->dev;
 
 	nvme_kill_queues(&dev->ctrl);
-	if (pci_get_drvdata(pdev))
-		device_release_driver(&pdev->dev);
+	if (dev_get_drvdata(ddev))
+		device_release_driver(ddev);
 	nvme_put_ctrl(&dev->ctrl);
 }
 
@@ -1860,31 +1898,34 @@ static const struct nvme_ctrl_ops nvme_mmio_ctrl_ops = {
 	.submit_async_event	= nvme_mmio_submit_async_event,
 };
 
-static int nvme_dev_map(struct nvme_dev *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
-
-	if (pci_request_mem_regions(pdev, "nvme"))
-		return -ENODEV;
-
-	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
-	if (!dev->bar)
-		goto release;
-
-       return 0;
-  release:
-       pci_release_mem_regions(pdev);
-       return -ENODEV;
-}
+static const struct nvme_dev_ops nvme_pci_dev_ops = {
+	.enable			= nvme_pci_enable,
+	.disable		= nvme_pci_disable,
+	.map_irq		= nvme_pci_map_irq,
+	.q_irq			= nvme_pci_q_irq,
+	.is_enabled		= nvme_pci_is_enabled,
+	.is_offline		= nvme_pci_is_offline,
+	.is_present		= nvme_pci_is_present,
+};
 
-static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int nvme_probe(struct device *ddev, struct resource *res,
+		const struct nvme_dev_ops *ops, unsigned long quirks)
 {
 	int node, result = -ENOMEM;
 	struct nvme_dev *dev;
 
-	node = dev_to_node(&pdev->dev);
+	if (!ops || !ops->enable
+		 || !ops->disable
+		 || !ops->map_irq
+		 || !ops->q_irq
+		 || !ops->is_enabled
+		 || !ops->is_offline
+		 || !ops->is_present)
+		return -EINVAL;
+
+	node = dev_to_node(ddev);
 	if (node == NUMA_NO_NODE)
-		set_dev_node(&pdev->dev, first_memory_node);
+		set_dev_node(ddev, first_memory_node);
 
 	dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
 	if (!dev)
@@ -1894,12 +1935,16 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!dev->queues)
 		goto free;
 
-	dev->dev = get_device(&pdev->dev);
-	pci_set_drvdata(pdev, dev);
+	dev->ops = ops;
+	dev->res = res;
+	dev->dev = get_device(ddev);
+	dev_set_drvdata(ddev, dev);
 
-	result = nvme_dev_map(dev);
-	if (result)
+	dev->bar = devm_ioremap(ddev, dev->res->start, 8192);
+	if (!dev->bar) {
+		result = -ENODEV;
 		goto free;
+	}
 
 	INIT_WORK(&dev->reset_work, nvme_reset_work);
 	INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
@@ -1910,29 +1955,53 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	result = nvme_setup_prp_pools(dev);
 	if (result)
-		goto put_pci;
+		goto put_dev;
 
-	result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_mmio_ctrl_ops,
-			id->driver_data);
+	result = nvme_init_ctrl(&dev->ctrl, ddev, &nvme_mmio_ctrl_ops,
+			quirks);
 	if (result)
 		goto release_pools;
 
-	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
+	dev_info(dev->ctrl.device, "%s function %s\n",
+			ddev->bus ? ddev->bus->name : "", dev_name(ddev));
 
 	queue_work(nvme_workq, &dev->reset_work);
 	return 0;
 
  release_pools:
 	nvme_release_prp_pools(dev);
- put_pci:
+ put_dev:
 	put_device(dev->dev);
-	nvme_dev_unmap(dev);
  free:
 	kfree(dev->queues);
 	kfree(dev);
 	return result;
 }
 
+static void nvme_pci_release_regions(void *data)
+{
+	struct pci_dev *pdev = data;
+
+	pci_release_mem_regions(pdev);
+}
+
+static int nvme_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int rc;
+
+	rc = pci_request_mem_regions(pdev, "nvme");
+	if (rc)
+		return rc;
+
+	rc = devm_add_action_or_reset(&pdev->dev, nvme_pci_release_regions,
+			pdev);
+	if (rc)
+		return rc;
+
+	return nvme_probe(&pdev->dev, &pdev->resource[0], &nvme_pci_dev_ops,
+			id->driver_data);
+}
+
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
@@ -1943,9 +2012,10 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
 		queue_work(nvme_workq, &dev->reset_work);
 }
 
-static void nvme_shutdown(struct pci_dev *pdev)
+static void nvme_pci_shutdown(struct pci_dev *pdev)
 {
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
+
 	nvme_dev_disable(dev, true);
 }
 
@@ -1954,15 +2024,15 @@ static void nvme_shutdown(struct pci_dev *pdev)
  * state. This function must not have any dependencies on the device state in
  * order to proceed.
  */
-static void nvme_remove(struct pci_dev *pdev)
+static void nvme_remove(struct device *ddev)
 {
-	struct nvme_dev *dev = pci_get_drvdata(pdev);
+	struct nvme_dev *dev = dev_get_drvdata(ddev);
 
 	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 
-	pci_set_drvdata(pdev, NULL);
+	dev_set_drvdata(ddev, NULL);
 
-	if (!pci_device_is_present(pdev))
+	if (!dev->ops->is_present(dev))
 		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
 
 	flush_work(&dev->reset_work);
@@ -1972,10 +2042,14 @@ static void nvme_remove(struct pci_dev *pdev)
 	nvme_free_queues(dev, 0);
 	nvme_release_cmb(dev);
 	nvme_release_prp_pools(dev);
-	nvme_dev_unmap(dev);
 	nvme_put_ctrl(&dev->ctrl);
 }
 
+static void nvme_pci_remove(struct pci_dev *pdev)
+{
+	nvme_remove(&pdev->dev);
+}
+
 static int nvme_pci_sriov_configure(struct pci_dev *pdev, int numvfs)
 {
 	int ret = 0;
@@ -1997,8 +2071,7 @@ static int nvme_pci_sriov_configure(struct pci_dev *pdev, int numvfs)
 #ifdef CONFIG_PM_SLEEP
 static int nvme_suspend(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct nvme_dev *ndev = pci_get_drvdata(pdev);
+	struct nvme_dev *ndev = dev_get_drvdata(dev);
 
 	nvme_dev_disable(ndev, true);
 	return 0;
@@ -2006,8 +2079,7 @@ static int nvme_suspend(struct device *dev)
 
 static int nvme_resume(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct nvme_dev *ndev = pci_get_drvdata(pdev);
+	struct nvme_dev *ndev = dev_get_drvdata(dev);
 
 	queue_work(nvme_workq, &ndev->reset_work);
 	return 0;
@@ -2092,9 +2164,9 @@ MODULE_DEVICE_TABLE(pci, nvme_id_table);
 static struct pci_driver nvme_driver = {
 	.name		= "nvme",
 	.id_table	= nvme_id_table,
-	.probe		= nvme_probe,
-	.remove		= nvme_remove,
-	.shutdown	= nvme_shutdown,
+	.probe		= nvme_pci_probe,
+	.remove		= nvme_pci_remove,
+	.shutdown	= nvme_pci_shutdown,
 	.driver		= {
 		.pm	= &nvme_dev_pm_ops,
 	},

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 4/5] nvme: move common definitions to pci.h
  2016-10-22  0:25 ` Dan Williams
@ 2016-10-22  0:25   ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)
  To: tj; +Cc: keith.busch, linux-ide, hch, linux-nvme

A platform-driver for nvme resources needs access to struct nvme_dev and
other definitions that are currently local to pci.c.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvme/host/pci.c |   69 +-----------------------------------
 drivers/nvme/host/pci.h |   89 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 68 deletions(-)
 create mode 100644 drivers/nvme/host/pci.h

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ea1c623ed257..418ccc1c0cf7 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -45,6 +45,7 @@
 #include <asm/unaligned.h>
 
 #include "nvme.h"
+#include "pci.h"
 
 #define NVME_Q_DEPTH		1024
 #define NVME_AQ_DEPTH		256
@@ -66,86 +67,18 @@ MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
 static struct workqueue_struct *nvme_workq;
 
-struct nvme_dev;
 struct nvme_queue;
 
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
-struct nvme_dev_ops {
-	int (*enable)(struct nvme_dev *dev);
-	void (*disable)(struct nvme_dev *dev);
-	int (*map_irq)(struct nvme_dev *dev, int nr_io_queues);
-	int (*q_irq)(struct nvme_queue *q);
-	int (*is_enabled)(struct nvme_dev *dev);
-	int (*is_offline)(struct nvme_dev *dev);
-	bool (*is_present)(struct nvme_dev *dev);
-};
-
-/*
- * Represents an NVM Express device.  Each nvme_dev is a PCI function.
- */
-struct nvme_dev {
-	struct nvme_queue **queues;
-	struct blk_mq_tag_set tagset;
-	struct blk_mq_tag_set admin_tagset;
-	u32 __iomem *dbs;
-	struct device *dev;
-	struct dma_pool *prp_page_pool;
-	struct dma_pool *prp_small_pool;
-	unsigned queue_count;
-	unsigned online_queues;
-	unsigned max_qid;
-	int q_depth;
-	u32 db_stride;
-	void __iomem *bar;
-	struct work_struct reset_work;
-	struct work_struct remove_work;
-	struct timer_list watchdog_timer;
-	struct mutex shutdown_lock;
-	bool subsystem;
-	void __iomem *cmb;
-	dma_addr_t cmb_dma_addr;
-	u64 cmb_size;
-	u32 cmbsz;
-	struct nvme_ctrl ctrl;
-	struct completion ioq_wait;
-	const struct resource *res;
-	const struct nvme_dev_ops *ops;
-};
-
 static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
 {
 	return container_of(ctrl, struct nvme_dev, ctrl);
 }
 
 /*
- * An NVM Express queue.  Each device has at least two (one for admin
- * commands and one for I/O commands).
- */
-struct nvme_queue {
-	struct device *q_dmadev;
-	struct nvme_dev *dev;
-	char irqname[24];	/* nvme4294967295-65535\0 */
-	spinlock_t q_lock;
-	struct nvme_command *sq_cmds;
-	struct nvme_command __iomem *sq_cmds_io;
-	volatile struct nvme_completion *cqes;
-	struct blk_mq_tags **tags;
-	dma_addr_t sq_dma_addr;
-	dma_addr_t cq_dma_addr;
-	u32 __iomem *q_db;
-	u16 q_depth;
-	s16 cq_vector;
-	u16 sq_tail;
-	u16 cq_head;
-	u16 qid;
-	u8 cq_phase;
-	u8 cqe_seen;
-};
-
-/*
  * The nvme_iod describes the data in an I/O, including the list of PRP
  * entries.  You can't see it in this data structure because C doesn't let
  * me express that.  Use nvme_init_iod to ensure there's enough space
diff --git a/drivers/nvme/host/pci.h b/drivers/nvme/host/pci.h
new file mode 100644
index 000000000000..62b658abb886
--- /dev/null
+++ b/drivers/nvme/host/pci.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2011-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __NVME_PCI_H__
+#define __NVME_PCI_H__
+#include <linux/blk-mq.h>
+
+struct nvme_queue;
+struct nvme_dev;
+struct resource;
+struct device;
+
+struct nvme_dev_ops {
+	int (*enable)(struct nvme_dev *dev);
+	void (*disable)(struct nvme_dev *dev);
+	int (*map_irq)(struct nvme_dev *dev, int nr_io_queues);
+	int (*q_irq)(struct nvme_queue *q);
+	int (*is_enabled)(struct nvme_dev *dev);
+	int (*is_offline)(struct nvme_dev *dev);
+	bool (*is_present)(struct nvme_dev *dev);
+};
+
+/*
+ * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+	struct nvme_queue **queues;
+	struct blk_mq_tag_set tagset;
+	struct blk_mq_tag_set admin_tagset;
+	u32 __iomem *dbs;
+	struct device *dev;
+	struct dma_pool *prp_page_pool;
+	struct dma_pool *prp_small_pool;
+	unsigned queue_count;
+	unsigned online_queues;
+	unsigned max_qid;
+	int q_depth;
+	u32 db_stride;
+	void __iomem *bar;
+	struct work_struct reset_work;
+	struct work_struct remove_work;
+	struct timer_list watchdog_timer;
+	struct mutex shutdown_lock;
+	bool subsystem;
+	void __iomem *cmb;
+	dma_addr_t cmb_dma_addr;
+	u64 cmb_size;
+	u32 cmbsz;
+	struct nvme_ctrl ctrl;
+	struct completion ioq_wait;
+	const struct resource *res;
+	const struct nvme_dev_ops *ops;
+};
+
+/*
+ * An NVM Express queue.  Each device has at least two (one for admin
+ * commands and one for I/O commands).
+ */
+struct nvme_queue {
+	struct device *q_dmadev;
+	struct nvme_dev *dev;
+	char irqname[24];	/* nvme4294967295-65535\0 */
+	spinlock_t q_lock;
+	struct nvme_command *sq_cmds;
+	struct nvme_command __iomem *sq_cmds_io;
+	volatile struct nvme_completion *cqes;
+	struct blk_mq_tags **tags;
+	dma_addr_t sq_dma_addr;
+	dma_addr_t cq_dma_addr;
+	u32 __iomem *q_db;
+	u16 q_depth;
+	s16 cq_vector;
+	u16 sq_tail;
+	u16 cq_head;
+	u16 qid;
+	u8 cq_phase;
+	u8 cqe_seen;
+};
+#endif /* __NVME_PCI_H__ */


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 4/5] nvme: move common definitions to pci.h
@ 2016-10-22  0:25   ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)


A platform-driver for nvme resources needs access to struct nvme_dev and
other definitions that are currently local to pci.c.

Signed-off-by: Dan Williams <dan.j.williams at intel.com>
---
 drivers/nvme/host/pci.c |   69 +-----------------------------------
 drivers/nvme/host/pci.h |   89 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 68 deletions(-)
 create mode 100644 drivers/nvme/host/pci.h

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ea1c623ed257..418ccc1c0cf7 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -45,6 +45,7 @@
 #include <asm/unaligned.h>
 
 #include "nvme.h"
+#include "pci.h"
 
 #define NVME_Q_DEPTH		1024
 #define NVME_AQ_DEPTH		256
@@ -66,86 +67,18 @@ MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
 static struct workqueue_struct *nvme_workq;
 
-struct nvme_dev;
 struct nvme_queue;
 
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
-struct nvme_dev_ops {
-	int (*enable)(struct nvme_dev *dev);
-	void (*disable)(struct nvme_dev *dev);
-	int (*map_irq)(struct nvme_dev *dev, int nr_io_queues);
-	int (*q_irq)(struct nvme_queue *q);
-	int (*is_enabled)(struct nvme_dev *dev);
-	int (*is_offline)(struct nvme_dev *dev);
-	bool (*is_present)(struct nvme_dev *dev);
-};
-
-/*
- * Represents an NVM Express device.  Each nvme_dev is a PCI function.
- */
-struct nvme_dev {
-	struct nvme_queue **queues;
-	struct blk_mq_tag_set tagset;
-	struct blk_mq_tag_set admin_tagset;
-	u32 __iomem *dbs;
-	struct device *dev;
-	struct dma_pool *prp_page_pool;
-	struct dma_pool *prp_small_pool;
-	unsigned queue_count;
-	unsigned online_queues;
-	unsigned max_qid;
-	int q_depth;
-	u32 db_stride;
-	void __iomem *bar;
-	struct work_struct reset_work;
-	struct work_struct remove_work;
-	struct timer_list watchdog_timer;
-	struct mutex shutdown_lock;
-	bool subsystem;
-	void __iomem *cmb;
-	dma_addr_t cmb_dma_addr;
-	u64 cmb_size;
-	u32 cmbsz;
-	struct nvme_ctrl ctrl;
-	struct completion ioq_wait;
-	const struct resource *res;
-	const struct nvme_dev_ops *ops;
-};
-
 static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
 {
 	return container_of(ctrl, struct nvme_dev, ctrl);
 }
 
 /*
- * An NVM Express queue.  Each device has at least two (one for admin
- * commands and one for I/O commands).
- */
-struct nvme_queue {
-	struct device *q_dmadev;
-	struct nvme_dev *dev;
-	char irqname[24];	/* nvme4294967295-65535\0 */
-	spinlock_t q_lock;
-	struct nvme_command *sq_cmds;
-	struct nvme_command __iomem *sq_cmds_io;
-	volatile struct nvme_completion *cqes;
-	struct blk_mq_tags **tags;
-	dma_addr_t sq_dma_addr;
-	dma_addr_t cq_dma_addr;
-	u32 __iomem *q_db;
-	u16 q_depth;
-	s16 cq_vector;
-	u16 sq_tail;
-	u16 cq_head;
-	u16 qid;
-	u8 cq_phase;
-	u8 cqe_seen;
-};
-
-/*
  * The nvme_iod describes the data in an I/O, including the list of PRP
  * entries.  You can't see it in this data structure because C doesn't let
  * me express that.  Use nvme_init_iod to ensure there's enough space
diff --git a/drivers/nvme/host/pci.h b/drivers/nvme/host/pci.h
new file mode 100644
index 000000000000..62b658abb886
--- /dev/null
+++ b/drivers/nvme/host/pci.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2011-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __NVME_PCI_H__
+#define __NVME_PCI_H__
+#include <linux/blk-mq.h>
+
+struct nvme_queue;
+struct nvme_dev;
+struct resource;
+struct device;
+
+struct nvme_dev_ops {
+	int (*enable)(struct nvme_dev *dev);
+	void (*disable)(struct nvme_dev *dev);
+	int (*map_irq)(struct nvme_dev *dev, int nr_io_queues);
+	int (*q_irq)(struct nvme_queue *q);
+	int (*is_enabled)(struct nvme_dev *dev);
+	int (*is_offline)(struct nvme_dev *dev);
+	bool (*is_present)(struct nvme_dev *dev);
+};
+
+/*
+ * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+	struct nvme_queue **queues;
+	struct blk_mq_tag_set tagset;
+	struct blk_mq_tag_set admin_tagset;
+	u32 __iomem *dbs;
+	struct device *dev;
+	struct dma_pool *prp_page_pool;
+	struct dma_pool *prp_small_pool;
+	unsigned queue_count;
+	unsigned online_queues;
+	unsigned max_qid;
+	int q_depth;
+	u32 db_stride;
+	void __iomem *bar;
+	struct work_struct reset_work;
+	struct work_struct remove_work;
+	struct timer_list watchdog_timer;
+	struct mutex shutdown_lock;
+	bool subsystem;
+	void __iomem *cmb;
+	dma_addr_t cmb_dma_addr;
+	u64 cmb_size;
+	u32 cmbsz;
+	struct nvme_ctrl ctrl;
+	struct completion ioq_wait;
+	const struct resource *res;
+	const struct nvme_dev_ops *ops;
+};
+
+/*
+ * An NVM Express queue.  Each device has at least two (one for admin
+ * commands and one for I/O commands).
+ */
+struct nvme_queue {
+	struct device *q_dmadev;
+	struct nvme_dev *dev;
+	char irqname[24];	/* nvme4294967295-65535\0 */
+	spinlock_t q_lock;
+	struct nvme_command *sq_cmds;
+	struct nvme_command __iomem *sq_cmds_io;
+	volatile struct nvme_completion *cqes;
+	struct blk_mq_tags **tags;
+	dma_addr_t sq_dma_addr;
+	dma_addr_t cq_dma_addr;
+	u32 __iomem *q_db;
+	u16 q_depth;
+	s16 cq_vector;
+	u16 sq_tail;
+	u16 cq_head;
+	u16 qid;
+	u8 cq_phase;
+	u8 cqe_seen;
+};
+#endif /* __NVME_PCI_H__ */

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 5/5] nvme: ahci remap support
  2016-10-22  0:25 ` Dan Williams
@ 2016-10-22  0:25   ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)
  To: tj; +Cc: keith.busch, linux-ide, hch, linux-nvme

Provide a platform driver for the nvme resources that may be hidden /
remapped behind an ahci bar. The implementation is the standard baseline
nvme driver with callouts to "ahci_nvme" specific routines to replace
"pci-express" functionality.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/ata/Kconfig        |    1 
 drivers/nvme/host/Makefile |    2 
 drivers/nvme/host/ahci.c   |  198 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/pci.c    |   19 +++-
 drivers/nvme/host/pci.h    |    9 ++
 5 files changed, 222 insertions(+), 7 deletions(-)
 create mode 100644 drivers/nvme/host/ahci.c

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index b9e46f2c69c1..189adbcfe4d3 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -92,6 +92,7 @@ config SATA_AHCI
 
 config SATA_AHCI_NVME
 	tristate "AHCI: Remapped NVMe support"
+	default SATA_AHCI
 	depends on SATA_AHCI
 	depends on BLK_DEV_NVME
 	help
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 47abcec23514..e665b6232a10 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -2,12 +2,14 @@ obj-$(CONFIG_NVME_CORE)			+= nvme-core.o
 obj-$(CONFIG_BLK_DEV_NVME)		+= nvme.o
 obj-$(CONFIG_NVME_FABRICS)		+= nvme-fabrics.o
 obj-$(CONFIG_NVME_RDMA)			+= nvme-rdma.o
+obj-$(CONFIG_SATA_AHCI_NVME)		+= ahci-nvme.o
 
 nvme-core-y				:= core.o
 nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI)	+= scsi.o
 nvme-core-$(CONFIG_NVM)			+= lightnvm.o
 
 nvme-y					+= pci.o
+ahci-nvme-y				+= ahci.o
 
 nvme-fabrics-y				+= fabrics.o
 
diff --git a/drivers/nvme/host/ahci.c b/drivers/nvme/host/ahci.c
new file mode 100644
index 000000000000..ee2aaadc5213
--- /dev/null
+++ b/drivers/nvme/host/ahci.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2011-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#include "pci.h"
+
+struct ahci_nvme_data {
+	atomic_t enabled;
+};
+
+static struct ahci_nvme_data *to_ahci_nvme_data(struct nvme_dev *dev)
+{
+	return dev->dev->platform_data;
+}
+
+static int ahci_nvme_enable(struct nvme_dev *dev)
+{
+	int rc;
+	struct resource *res;
+	struct device *ddev = dev->dev;
+	struct device *parent = ddev->parent;
+	struct ahci_nvme_data *adata = to_ahci_nvme_data(dev);
+	struct platform_device *pdev = to_platform_device(ddev);
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res)
+		return -ENXIO;
+
+	/* parent ahci device determines the dma mask */
+	if (dma_supported(parent, DMA_BIT_MASK(64)))
+		rc = dma_coerce_mask_and_coherent(ddev, DMA_BIT_MASK(64));
+	else if (dma_supported(parent, DMA_BIT_MASK(32)))
+		rc = dma_coerce_mask_and_coherent(ddev, DMA_BIT_MASK(32));
+	else
+		rc = -ENXIO;
+	if (rc)
+		return rc;
+
+	rc = nvme_enable(dev);
+	if (rc)
+		return rc;
+
+	atomic_inc(&adata->enabled);
+
+	return 0;
+}
+
+static int ahci_nvme_is_enabled(struct nvme_dev *dev)
+{
+	struct ahci_nvme_data *adata = to_ahci_nvme_data(dev);
+
+	return atomic_read(&adata->enabled) > 0;
+}
+
+static void ahci_nvme_disable(struct nvme_dev *dev)
+{
+	struct ahci_nvme_data *adata = to_ahci_nvme_data(dev);
+
+	/*
+	 * bug compatible with nvme_pci_disable() which also has this
+	 * potential disable race
+	 */
+	if (ahci_nvme_is_enabled(dev))
+		atomic_dec(&adata->enabled);
+}
+
+static int ahci_nvme_is_offline(struct nvme_dev *dev)
+{
+	return 0;
+}
+
+static bool ahci_nvme_is_present(struct nvme_dev *dev)
+{
+	return true;
+}
+
+static int ahci_nvme_map_irq(struct nvme_dev *dev, int nr_io_queues)
+{
+	struct platform_device *pdev = to_platform_device(dev->dev);
+	struct nvme_queue *adminq = dev->queues[0];
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res)
+		return -ENXIO;
+
+	/* Deregister the admin queue's interrupt */
+	free_irq(res->start, adminq);
+
+	return min_t(int, resource_size(res), nr_io_queues);
+}
+
+static int ahci_nvme_q_irq(struct nvme_queue *nvmeq)
+{
+	struct resource *res;
+	struct nvme_dev *dev = nvmeq->dev;
+	struct platform_device *pdev = to_platform_device(dev->dev);
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res)
+		return -ENXIO;
+
+	if (resource_size(res) > nvmeq->qid)
+		return res->start + nvmeq->qid;
+	return res->start;
+}
+
+static const struct nvme_dev_ops ahci_nvme_dev_ops = {
+	.enable			= ahci_nvme_enable,
+	.disable		= ahci_nvme_disable,
+	.map_irq		= ahci_nvme_map_irq,
+	.q_irq			= ahci_nvme_q_irq,
+	.is_enabled		= ahci_nvme_is_enabled,
+	.is_offline		= ahci_nvme_is_offline,
+	.is_present		= ahci_nvme_is_present,
+};
+
+static void ahci_nvme_shutdown(struct platform_device *pdev)
+{
+	struct nvme_dev *dev = platform_get_drvdata(pdev);
+
+	nvme_dev_disable(dev, true);
+}
+
+static int ahci_nvme_remove(struct platform_device *pdev)
+{
+	nvme_remove(&pdev->dev);
+	pdev->dev.platform_data = NULL;
+	return 0;
+}
+
+static struct platform_device_id ahci_nvme_id_table[] = {
+	{ .name = "ahci_nvme", },
+	{},
+};
+
+static int ahci_nvme_probe(struct platform_device *pdev)
+{
+	struct device *ddev = &pdev->dev;
+	struct ahci_nvme_data *adata;
+	struct resource *res;
+
+	adata = devm_kzalloc(ddev, sizeof(*adata), GFP_KERNEL);
+	if (!adata)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENXIO;
+
+	if (!devm_request_mem_region(ddev, res->start, resource_size(res),
+				dev_name(ddev)))
+		return -EBUSY;
+
+	ddev->platform_data = adata;
+	return nvme_probe(ddev, res, &ahci_nvme_dev_ops, 0);
+}
+
+static SIMPLE_DEV_PM_OPS(ahci_nvme_dev_pm_ops, nvme_suspend, nvme_resume);
+
+static struct platform_driver ahci_nvme_driver = {
+	.driver		= {
+		.name	= "ahci_nvme",
+		.pm     = &ahci_nvme_dev_pm_ops,
+	},
+	.id_table	= ahci_nvme_id_table,
+	.probe		= ahci_nvme_probe,
+	.remove		= ahci_nvme_remove,
+	.shutdown	= ahci_nvme_shutdown,
+};
+
+static __init int ahci_nvme_init(void)
+{
+	return platform_driver_register(&ahci_nvme_driver);
+}
+
+static __exit void ahci_nvme_exit(void)
+{
+	platform_driver_unregister(&ahci_nvme_driver);
+}
+
+MODULE_DEVICE_TABLE(platform, ahci_nvme_id_table);
+module_init(ahci_nvme_init);
+module_exit(ahci_nvme_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 418ccc1c0cf7..2814caba9e2e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -71,7 +71,6 @@ struct nvme_queue;
 
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
 static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
 {
@@ -1525,7 +1524,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	return 0;
 }
 
-static int nvme_enable(struct nvme_dev *dev)
+int nvme_enable(struct nvme_dev *dev)
 {
 	u64 cap;
 
@@ -1540,6 +1539,7 @@ static int nvme_enable(struct nvme_dev *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_enable);
 
 static int nvme_pci_enable(struct nvme_dev *dev)
 {
@@ -1618,7 +1618,7 @@ static bool nvme_pci_is_present(struct nvme_dev *dev)
 	return pci_device_is_present(to_pci_dev(dev->dev));
 }
 
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
+void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i;
 	u32 csts = -1;
@@ -1651,6 +1651,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
 	mutex_unlock(&dev->shutdown_lock);
 }
+EXPORT_SYMBOL_GPL(nvme_dev_disable);
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
 {
@@ -1841,7 +1842,7 @@ static const struct nvme_dev_ops nvme_pci_dev_ops = {
 	.is_present		= nvme_pci_is_present,
 };
 
-static int nvme_probe(struct device *ddev, struct resource *res,
+int nvme_probe(struct device *ddev, struct resource *res,
 		const struct nvme_dev_ops *ops, unsigned long quirks)
 {
 	int node, result = -ENOMEM;
@@ -1910,6 +1911,7 @@ static int nvme_probe(struct device *ddev, struct resource *res,
 	kfree(dev);
 	return result;
 }
+EXPORT_SYMBOL_GPL(nvme_probe);
 
 static void nvme_pci_release_regions(void *data)
 {
@@ -1957,7 +1959,7 @@ static void nvme_pci_shutdown(struct pci_dev *pdev)
  * state. This function must not have any dependencies on the device state in
  * order to proceed.
  */
-static void nvme_remove(struct device *ddev)
+void nvme_remove(struct device *ddev)
 {
 	struct nvme_dev *dev = dev_get_drvdata(ddev);
 
@@ -1977,6 +1979,7 @@ static void nvme_remove(struct device *ddev)
 	nvme_release_prp_pools(dev);
 	nvme_put_ctrl(&dev->ctrl);
 }
+EXPORT_SYMBOL_GPL(nvme_remove);
 
 static void nvme_pci_remove(struct pci_dev *pdev)
 {
@@ -2002,21 +2005,23 @@ static int nvme_pci_sriov_configure(struct pci_dev *pdev, int numvfs)
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int nvme_suspend(struct device *dev)
+int nvme_suspend(struct device *dev)
 {
 	struct nvme_dev *ndev = dev_get_drvdata(dev);
 
 	nvme_dev_disable(ndev, true);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_suspend);
 
-static int nvme_resume(struct device *dev)
+int nvme_resume(struct device *dev)
 {
 	struct nvme_dev *ndev = dev_get_drvdata(dev);
 
 	queue_work(nvme_workq, &ndev->reset_work);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_resume);
 #endif
 
 static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
diff --git a/drivers/nvme/host/pci.h b/drivers/nvme/host/pci.h
index 62b658abb886..f1a3e928ed80 100644
--- a/drivers/nvme/host/pci.h
+++ b/drivers/nvme/host/pci.h
@@ -14,6 +14,7 @@
 #ifndef __NVME_PCI_H__
 #define __NVME_PCI_H__
 #include <linux/blk-mq.h>
+#include "nvme.h"
 
 struct nvme_queue;
 struct nvme_dev;
@@ -86,4 +87,12 @@ struct nvme_queue {
 	u8 cq_phase;
 	u8 cqe_seen;
 };
+
+int nvme_probe(struct device *ddev, struct resource *res,
+		const struct nvme_dev_ops *ops, unsigned long quirks);
+void nvme_remove(struct device *ddev);
+int nvme_enable(struct nvme_dev *dev);
+void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
+int nvme_suspend(struct device *dev);
+int nvme_resume(struct device *dev);
 #endif /* __NVME_PCI_H__ */


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 5/5] nvme: ahci remap support
@ 2016-10-22  0:25   ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22  0:25 UTC (permalink / raw)


Provide a platform driver for the nvme resources that may be hidden /
remapped behind an ahci bar. The implementation is the standard baseline
nvme driver with callouts to "ahci_nvme" specific routines to replace
"pci-express" functionality.

Signed-off-by: Dan Williams <dan.j.williams at intel.com>
---
 drivers/ata/Kconfig        |    1 
 drivers/nvme/host/Makefile |    2 
 drivers/nvme/host/ahci.c   |  198 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/pci.c    |   19 +++-
 drivers/nvme/host/pci.h    |    9 ++
 5 files changed, 222 insertions(+), 7 deletions(-)
 create mode 100644 drivers/nvme/host/ahci.c

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index b9e46f2c69c1..189adbcfe4d3 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -92,6 +92,7 @@ config SATA_AHCI
 
 config SATA_AHCI_NVME
 	tristate "AHCI: Remapped NVMe support"
+	default SATA_AHCI
 	depends on SATA_AHCI
 	depends on BLK_DEV_NVME
 	help
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 47abcec23514..e665b6232a10 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -2,12 +2,14 @@ obj-$(CONFIG_NVME_CORE)			+= nvme-core.o
 obj-$(CONFIG_BLK_DEV_NVME)		+= nvme.o
 obj-$(CONFIG_NVME_FABRICS)		+= nvme-fabrics.o
 obj-$(CONFIG_NVME_RDMA)			+= nvme-rdma.o
+obj-$(CONFIG_SATA_AHCI_NVME)		+= ahci-nvme.o
 
 nvme-core-y				:= core.o
 nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI)	+= scsi.o
 nvme-core-$(CONFIG_NVM)			+= lightnvm.o
 
 nvme-y					+= pci.o
+ahci-nvme-y				+= ahci.o
 
 nvme-fabrics-y				+= fabrics.o
 
diff --git a/drivers/nvme/host/ahci.c b/drivers/nvme/host/ahci.c
new file mode 100644
index 000000000000..ee2aaadc5213
--- /dev/null
+++ b/drivers/nvme/host/ahci.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2011-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#include "pci.h"
+
+struct ahci_nvme_data {
+	atomic_t enabled;
+};
+
+static struct ahci_nvme_data *to_ahci_nvme_data(struct nvme_dev *dev)
+{
+	return dev->dev->platform_data;
+}
+
+static int ahci_nvme_enable(struct nvme_dev *dev)
+{
+	int rc;
+	struct resource *res;
+	struct device *ddev = dev->dev;
+	struct device *parent = ddev->parent;
+	struct ahci_nvme_data *adata = to_ahci_nvme_data(dev);
+	struct platform_device *pdev = to_platform_device(ddev);
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res)
+		return -ENXIO;
+
+	/* parent ahci device determines the dma mask */
+	if (dma_supported(parent, DMA_BIT_MASK(64)))
+		rc = dma_coerce_mask_and_coherent(ddev, DMA_BIT_MASK(64));
+	else if (dma_supported(parent, DMA_BIT_MASK(32)))
+		rc = dma_coerce_mask_and_coherent(ddev, DMA_BIT_MASK(32));
+	else
+		rc = -ENXIO;
+	if (rc)
+		return rc;
+
+	rc = nvme_enable(dev);
+	if (rc)
+		return rc;
+
+	atomic_inc(&adata->enabled);
+
+	return 0;
+}
+
+static int ahci_nvme_is_enabled(struct nvme_dev *dev)
+{
+	struct ahci_nvme_data *adata = to_ahci_nvme_data(dev);
+
+	return atomic_read(&adata->enabled) > 0;
+}
+
+static void ahci_nvme_disable(struct nvme_dev *dev)
+{
+	struct ahci_nvme_data *adata = to_ahci_nvme_data(dev);
+
+	/*
+	 * bug compatible with nvme_pci_disable() which also has this
+	 * potential disable race
+	 */
+	if (ahci_nvme_is_enabled(dev))
+		atomic_dec(&adata->enabled);
+}
+
+static int ahci_nvme_is_offline(struct nvme_dev *dev)
+{
+	return 0;
+}
+
+static bool ahci_nvme_is_present(struct nvme_dev *dev)
+{
+	return true;
+}
+
+static int ahci_nvme_map_irq(struct nvme_dev *dev, int nr_io_queues)
+{
+	struct platform_device *pdev = to_platform_device(dev->dev);
+	struct nvme_queue *adminq = dev->queues[0];
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res)
+		return -ENXIO;
+
+	/* Deregister the admin queue's interrupt */
+	free_irq(res->start, adminq);
+
+	return min_t(int, resource_size(res), nr_io_queues);
+}
+
+static int ahci_nvme_q_irq(struct nvme_queue *nvmeq)
+{
+	struct resource *res;
+	struct nvme_dev *dev = nvmeq->dev;
+	struct platform_device *pdev = to_platform_device(dev->dev);
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res)
+		return -ENXIO;
+
+	if (resource_size(res) > nvmeq->qid)
+		return res->start + nvmeq->qid;
+	return res->start;
+}
+
+static const struct nvme_dev_ops ahci_nvme_dev_ops = {
+	.enable			= ahci_nvme_enable,
+	.disable		= ahci_nvme_disable,
+	.map_irq		= ahci_nvme_map_irq,
+	.q_irq			= ahci_nvme_q_irq,
+	.is_enabled		= ahci_nvme_is_enabled,
+	.is_offline		= ahci_nvme_is_offline,
+	.is_present		= ahci_nvme_is_present,
+};
+
+static void ahci_nvme_shutdown(struct platform_device *pdev)
+{
+	struct nvme_dev *dev = platform_get_drvdata(pdev);
+
+	nvme_dev_disable(dev, true);
+}
+
+static int ahci_nvme_remove(struct platform_device *pdev)
+{
+	nvme_remove(&pdev->dev);
+	pdev->dev.platform_data = NULL;
+	return 0;
+}
+
+static struct platform_device_id ahci_nvme_id_table[] = {
+	{ .name = "ahci_nvme", },
+	{},
+};
+
+static int ahci_nvme_probe(struct platform_device *pdev)
+{
+	struct device *ddev = &pdev->dev;
+	struct ahci_nvme_data *adata;
+	struct resource *res;
+
+	adata = devm_kzalloc(ddev, sizeof(*adata), GFP_KERNEL);
+	if (!adata)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENXIO;
+
+	if (!devm_request_mem_region(ddev, res->start, resource_size(res),
+				dev_name(ddev)))
+		return -EBUSY;
+
+	ddev->platform_data = adata;
+	return nvme_probe(ddev, res, &ahci_nvme_dev_ops, 0);
+}
+
+static SIMPLE_DEV_PM_OPS(ahci_nvme_dev_pm_ops, nvme_suspend, nvme_resume);
+
+static struct platform_driver ahci_nvme_driver = {
+	.driver		= {
+		.name	= "ahci_nvme",
+		.pm     = &ahci_nvme_dev_pm_ops,
+	},
+	.id_table	= ahci_nvme_id_table,
+	.probe		= ahci_nvme_probe,
+	.remove		= ahci_nvme_remove,
+	.shutdown	= ahci_nvme_shutdown,
+};
+
+static __init int ahci_nvme_init(void)
+{
+	return platform_driver_register(&ahci_nvme_driver);
+}
+
+static __exit void ahci_nvme_exit(void)
+{
+	platform_driver_unregister(&ahci_nvme_driver);
+}
+
+MODULE_DEVICE_TABLE(platform, ahci_nvme_id_table);
+module_init(ahci_nvme_init);
+module_exit(ahci_nvme_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 418ccc1c0cf7..2814caba9e2e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -71,7 +71,6 @@ struct nvme_queue;
 
 static int nvme_reset(struct nvme_dev *dev);
 static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
 static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
 {
@@ -1525,7 +1524,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	return 0;
 }
 
-static int nvme_enable(struct nvme_dev *dev)
+int nvme_enable(struct nvme_dev *dev)
 {
 	u64 cap;
 
@@ -1540,6 +1539,7 @@ static int nvme_enable(struct nvme_dev *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_enable);
 
 static int nvme_pci_enable(struct nvme_dev *dev)
 {
@@ -1618,7 +1618,7 @@ static bool nvme_pci_is_present(struct nvme_dev *dev)
 	return pci_device_is_present(to_pci_dev(dev->dev));
 }
 
-static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
+void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i;
 	u32 csts = -1;
@@ -1651,6 +1651,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
 	mutex_unlock(&dev->shutdown_lock);
 }
+EXPORT_SYMBOL_GPL(nvme_dev_disable);
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
 {
@@ -1841,7 +1842,7 @@ static const struct nvme_dev_ops nvme_pci_dev_ops = {
 	.is_present		= nvme_pci_is_present,
 };
 
-static int nvme_probe(struct device *ddev, struct resource *res,
+int nvme_probe(struct device *ddev, struct resource *res,
 		const struct nvme_dev_ops *ops, unsigned long quirks)
 {
 	int node, result = -ENOMEM;
@@ -1910,6 +1911,7 @@ static int nvme_probe(struct device *ddev, struct resource *res,
 	kfree(dev);
 	return result;
 }
+EXPORT_SYMBOL_GPL(nvme_probe);
 
 static void nvme_pci_release_regions(void *data)
 {
@@ -1957,7 +1959,7 @@ static void nvme_pci_shutdown(struct pci_dev *pdev)
  * state. This function must not have any dependencies on the device state in
  * order to proceed.
  */
-static void nvme_remove(struct device *ddev)
+void nvme_remove(struct device *ddev)
 {
 	struct nvme_dev *dev = dev_get_drvdata(ddev);
 
@@ -1977,6 +1979,7 @@ static void nvme_remove(struct device *ddev)
 	nvme_release_prp_pools(dev);
 	nvme_put_ctrl(&dev->ctrl);
 }
+EXPORT_SYMBOL_GPL(nvme_remove);
 
 static void nvme_pci_remove(struct pci_dev *pdev)
 {
@@ -2002,21 +2005,23 @@ static int nvme_pci_sriov_configure(struct pci_dev *pdev, int numvfs)
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int nvme_suspend(struct device *dev)
+int nvme_suspend(struct device *dev)
 {
 	struct nvme_dev *ndev = dev_get_drvdata(dev);
 
 	nvme_dev_disable(ndev, true);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_suspend);
 
-static int nvme_resume(struct device *dev)
+int nvme_resume(struct device *dev)
 {
 	struct nvme_dev *ndev = dev_get_drvdata(dev);
 
 	queue_work(nvme_workq, &ndev->reset_work);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nvme_resume);
 #endif
 
 static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
diff --git a/drivers/nvme/host/pci.h b/drivers/nvme/host/pci.h
index 62b658abb886..f1a3e928ed80 100644
--- a/drivers/nvme/host/pci.h
+++ b/drivers/nvme/host/pci.h
@@ -14,6 +14,7 @@
 #ifndef __NVME_PCI_H__
 #define __NVME_PCI_H__
 #include <linux/blk-mq.h>
+#include "nvme.h"
 
 struct nvme_queue;
 struct nvme_dev;
@@ -86,4 +87,12 @@ struct nvme_queue {
 	u8 cq_phase;
 	u8 cqe_seen;
 };
+
+int nvme_probe(struct device *ddev, struct resource *res,
+		const struct nvme_dev_ops *ops, unsigned long quirks);
+void nvme_remove(struct device *ddev);
+int nvme_enable(struct nvme_dev *dev);
+void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
+int nvme_suspend(struct device *dev);
+int nvme_resume(struct device *dev);
 #endif /* __NVME_PCI_H__ */

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-22  0:25 ` Dan Williams
@ 2016-10-22  6:50   ` Christoph Hellwig
  -1 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-22  6:50 UTC (permalink / raw)
  To: Dan Williams; +Cc: tj, keith.busch, linux-ide, hch, linux-nvme

On Fri, Oct 21, 2016 at 05:25:21PM -0700, Dan Williams wrote:
> Some Intel ahci implementations have the capability to expose another
> pci-express device's memory resources through an ahci memory bar.  Add
> the enabling to detect these configurations and register the resources
> for the nvme driver to consume. Otherwise, the nvme device is
> effectively hidden from the kernel for this configuration.

Honestly I think this is just too ugly to live. I think people getting
tricked by Intel into buying this piece of junk will just have to live
with AHCI mode.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-22  6:50   ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-22  6:50 UTC (permalink / raw)


On Fri, Oct 21, 2016@05:25:21PM -0700, Dan Williams wrote:
> Some Intel ahci implementations have the capability to expose another
> pci-express device's memory resources through an ahci memory bar.  Add
> the enabling to detect these configurations and register the resources
> for the nvme driver to consume. Otherwise, the nvme device is
> effectively hidden from the kernel for this configuration.

Honestly I think this is just too ugly to live. I think people getting
tricked by Intel into buying this piece of junk will just have to live
with AHCI mode.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-22  6:50   ` Christoph Hellwig
@ 2016-10-22 19:26     ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22 19:26 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tejun Heo, Keith Busch, IDE/ATA development list, linux-nvme

On Fri, Oct 21, 2016 at 11:50 PM, Christoph Hellwig <hch@lst.de> wrote:
> On Fri, Oct 21, 2016 at 05:25:21PM -0700, Dan Williams wrote:
>> Some Intel ahci implementations have the capability to expose another
>> pci-express device's memory resources through an ahci memory bar.  Add
>> the enabling to detect these configurations and register the resources
>> for the nvme driver to consume. Otherwise, the nvme device is
>> effectively hidden from the kernel for this configuration.
>
> Honestly I think this is just too ugly to live. I think people getting
> tricked by Intel into buying this piece of junk will just have to live
> with AHCI mode.

Objection noted and, frankly, expected. But no, this is Linux, we as a
community do our best to not strand users with hardware they have in
hand. So, please help me review these patches with an eye towards
minimizing the ongoing maintenance burden to ahci and nvme.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-22 19:26     ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-22 19:26 UTC (permalink / raw)


On Fri, Oct 21, 2016@11:50 PM, Christoph Hellwig <hch@lst.de> wrote:
> On Fri, Oct 21, 2016@05:25:21PM -0700, Dan Williams wrote:
>> Some Intel ahci implementations have the capability to expose another
>> pci-express device's memory resources through an ahci memory bar.  Add
>> the enabling to detect these configurations and register the resources
>> for the nvme driver to consume. Otherwise, the nvme device is
>> effectively hidden from the kernel for this configuration.
>
> Honestly I think this is just too ugly to live. I think people getting
> tricked by Intel into buying this piece of junk will just have to live
> with AHCI mode.

Objection noted and, frankly, expected. But no, this is Linux, we as a
community do our best to not strand users with hardware they have in
hand. So, please help me review these patches with an eye towards
minimizing the ongoing maintenance burden to ahci and nvme.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-22 19:26     ` Dan Williams
@ 2016-10-23  8:34       ` Christoph Hellwig
  -1 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-23  8:34 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Tejun Heo, Keith Busch,
	IDE/ATA development list, linux-nvme

On Sat, Oct 22, 2016 at 12:26:54PM -0700, Dan Williams wrote:
> Objection noted and, frankly, expected. But no, this is Linux, we as a
> community do our best to not strand users with hardware they have in
> hand. So, please help me review these patches with an eye towards
> minimizing the ongoing maintenance burden to ahci and nvme.

It's just a major burden on NVMe.  What is the reset behavior of
your franken devices for example?  Pleas take it to the NVMe technical
working group first so that we can come up with a coherent theory
of operations.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-23  8:34       ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-23  8:34 UTC (permalink / raw)


On Sat, Oct 22, 2016@12:26:54PM -0700, Dan Williams wrote:
> Objection noted and, frankly, expected. But no, this is Linux, we as a
> community do our best to not strand users with hardware they have in
> hand. So, please help me review these patches with an eye towards
> minimizing the ongoing maintenance burden to ahci and nvme.

It's just a major burden on NVMe.  What is the reset behavior of
your franken devices for example?  Pleas take it to the NVMe technical
working group first so that we can come up with a coherent theory
of operations.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-23  8:34       ` Christoph Hellwig
@ 2016-10-23 13:57         ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-23 13:57 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tejun Heo, Keith Busch, IDE/ATA development list, linux-nvme

On Sun, Oct 23, 2016 at 1:34 AM, Christoph Hellwig <hch@lst.de> wrote:
> It's just a major burden on NVMe.  What is the reset behavior of
> your franken devices for example?  Pleas take it to the NVMe technical
> working group first so that we can come up with a coherent theory
> of operations.

I should clarify that these are not new devices for the NVMe technical
working group to consider. They are discrete / typical / off-the-shelf
NVMe devices from any vendor.  It's just the memory bar and interrupt
vector that are arranged to be shared with an ahci pci device.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-23 13:57         ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-23 13:57 UTC (permalink / raw)


On Sun, Oct 23, 2016@1:34 AM, Christoph Hellwig <hch@lst.de> wrote:
> It's just a major burden on NVMe.  What is the reset behavior of
> your franken devices for example?  Pleas take it to the NVMe technical
> working group first so that we can come up with a coherent theory
> of operations.

I should clarify that these are not new devices for the NVMe technical
working group to consider. They are discrete / typical / off-the-shelf
NVMe devices from any vendor.  It's just the memory bar and interrupt
vector that are arranged to be shared with an ahci pci device.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-23 13:57         ` Dan Williams
@ 2016-10-24 12:49           ` Christoph Hellwig
  -1 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-24 12:49 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Tejun Heo, Keith Busch,
	IDE/ATA development list, linux-nvme

On Sun, Oct 23, 2016 at 06:57:41AM -0700, Dan Williams wrote:
> I should clarify that these are not new devices for the NVMe technical
> working group to consider. They are discrete / typical / off-the-shelf
> NVMe devices from any vendor.  It's just the memory bar and interrupt
> vector that are arranged to be shared with an ahci pci device.

But this has a profound effect on the NVMe operation, because fo
example the NVMe reset cycle is tied into PCIe function states.

Please bring this issue up with the relevant standards comittee first,
otherwise we're getting us into a nightmare of undefined behavior here.

And it's not like Intel isn't active in this group.  I'd suggest you
talk to Amber who is the editor for both the AHCI and NVMe spec,
that should get you started.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-24 12:49           ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-24 12:49 UTC (permalink / raw)


On Sun, Oct 23, 2016@06:57:41AM -0700, Dan Williams wrote:
> I should clarify that these are not new devices for the NVMe technical
> working group to consider. They are discrete / typical / off-the-shelf
> NVMe devices from any vendor.  It's just the memory bar and interrupt
> vector that are arranged to be shared with an ahci pci device.

But this has a profound effect on the NVMe operation, because fo
example the NVMe reset cycle is tied into PCIe function states.

Please bring this issue up with the relevant standards comittee first,
otherwise we're getting us into a nightmare of undefined behavior here.

And it's not like Intel isn't active in this group.  I'd suggest you
talk to Amber who is the editor for both the AHCI and NVMe spec,
that should get you started.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-24 12:49           ` Christoph Hellwig
@ 2016-10-24 14:46             ` Keith Busch
  -1 siblings, 0 replies; 40+ messages in thread
From: Keith Busch @ 2016-10-24 14:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Tejun Heo, IDE/ATA development list, linux-nvme

On Mon, Oct 24, 2016 at 02:49:38PM +0200, Christoph Hellwig wrote:
> But this has a profound effect on the NVMe operation, because fo
> example the NVMe reset cycle is tied into PCIe function states.
> 
> Please bring this issue up with the relevant standards comittee first,
> otherwise we're getting us into a nightmare of undefined behavior here.
> 
> And it's not like Intel isn't active in this group.  I'd suggest you
> talk to Amber who is the editor for both the AHCI and NVMe spec,
> that should get you started.

Amber is aware of this and was supportive in having Intel open the
specs to enable this hardware.

The nvme driver has weird hooks to support the non-standard open channel
effort, and we let Apple dictate this driver can't have q-word access.
This remapping isn't exactly the first time we're helping non-standard
devices, and Dan's series looks isolated such that it won't harm
compliant ones.

Dan,
I take it you have access to this platform. Would you be able verify if
it can successfully resume from S3 and S4?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-24 14:46             ` Keith Busch
  0 siblings, 0 replies; 40+ messages in thread
From: Keith Busch @ 2016-10-24 14:46 UTC (permalink / raw)


On Mon, Oct 24, 2016@02:49:38PM +0200, Christoph Hellwig wrote:
> But this has a profound effect on the NVMe operation, because fo
> example the NVMe reset cycle is tied into PCIe function states.
> 
> Please bring this issue up with the relevant standards comittee first,
> otherwise we're getting us into a nightmare of undefined behavior here.
> 
> And it's not like Intel isn't active in this group.  I'd suggest you
> talk to Amber who is the editor for both the AHCI and NVMe spec,
> that should get you started.

Amber is aware of this and was supportive in having Intel open the
specs to enable this hardware.

The nvme driver has weird hooks to support the non-standard open channel
effort, and we let Apple dictate this driver can't have q-word access.
This remapping isn't exactly the first time we're helping non-standard
devices, and Dan's series looks isolated such that it won't harm
compliant ones.

Dan,
I take it you have access to this platform. Would you be able verify if
it can successfully resume from S3 and S4?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-24 14:46             ` Keith Busch
@ 2016-10-24 15:16               ` Christoph Hellwig
  -1 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-24 15:16 UTC (permalink / raw)
  To: Keith Busch
  Cc: Christoph Hellwig, Dan Williams, Tejun Heo,
	IDE/ATA development list, linux-nvme

On Mon, Oct 24, 2016 at 10:46:29AM -0400, Keith Busch wrote:
> Amber is aware of this and was supportive in having Intel open the
> specs to enable this hardware.

Ok, let's get the spec out first.  Do we expect to be able to use the
AHCI interface and the NVMe interface at the same time?  If the first
is the case I think we are royally screwed and I see no good way to
support this beast.  If it's the latter let's keep AHCI entirely out
of the game - add the affected PCI IDs to the NVMe driver itself, add
a quirk for them and implement the enable sequence inside the NVMe
driver.

> The nvme driver has weird hooks to support the non-standard open channel
> effort, and we let Apple dictate this driver can't have q-word access.
> This remapping isn't exactly the first time we're helping non-standard
> devices, and Dan's series looks isolated such that it won't harm
> compliant ones.

But it's the worst kind.  Open Channel is a bit of an oddball, but it
sits on top of the transport, the rest is minor workaround conditionalal
on PCI IDs.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-24 15:16               ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-24 15:16 UTC (permalink / raw)


On Mon, Oct 24, 2016@10:46:29AM -0400, Keith Busch wrote:
> Amber is aware of this and was supportive in having Intel open the
> specs to enable this hardware.

Ok, let's get the spec out first.  Do we expect to be able to use the
AHCI interface and the NVMe interface at the same time?  If the first
is the case I think we are royally screwed and I see no good way to
support this beast.  If it's the latter let's keep AHCI entirely out
of the game - add the affected PCI IDs to the NVMe driver itself, add
a quirk for them and implement the enable sequence inside the NVMe
driver.

> The nvme driver has weird hooks to support the non-standard open channel
> effort, and we let Apple dictate this driver can't have q-word access.
> This remapping isn't exactly the first time we're helping non-standard
> devices, and Dan's series looks isolated such that it won't harm
> compliant ones.

But it's the worst kind.  Open Channel is a bit of an oddball, but it
sits on top of the transport, the rest is minor workaround conditionalal
on PCI IDs.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-24 15:16               ` Christoph Hellwig
@ 2016-10-24 17:46                 ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-24 17:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Tejun Heo, IDE/ATA development list, linux-nvme

On Mon, Oct 24, 2016 at 8:16 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Mon, Oct 24, 2016 at 10:46:29AM -0400, Keith Busch wrote:
>> Amber is aware of this and was supportive in having Intel open the
>> specs to enable this hardware.
>
> Ok, let's get the spec out first.

The patch contents are it.

> Do we expect to be able to use the
> AHCI interface and the NVMe interface at the same time?

Yes, simultaneous access.

> If the first is the case I think we are royally screwed and I see no good way to
> support this beast.

I'm new to the NVMe driver and spec so I'll tread carefully here...

Resets do present a problem especially since they are specified to
reset pci registers that we do not have access to read/write.

That said, the driver seems to already comprehend instances where the
device does not support nvme_reset_subsystem() requests. I don't know
how often those resets need to be issued in practice.

> If it's the latter let's keep AHCI entirely out
> of the game - add the affected PCI IDs to the NVMe driver itself, add
> a quirk for them and implement the enable sequence inside the NVMe
> driver.

The PCI ID of the AHCI device is not uniquely identifiable when in this mode.

We could flip the arrangement and have the ahci device as the platform
device, but I'm not sure this makes the nvme reset problem much
better.  If we allow subsystem resets at all they would still need to
be coordinated across 2 devices/drivers to reinitialize pci registers.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-24 17:46                 ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-24 17:46 UTC (permalink / raw)


On Mon, Oct 24, 2016@8:16 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Mon, Oct 24, 2016@10:46:29AM -0400, Keith Busch wrote:
>> Amber is aware of this and was supportive in having Intel open the
>> specs to enable this hardware.
>
> Ok, let's get the spec out first.

The patch contents are it.

> Do we expect to be able to use the
> AHCI interface and the NVMe interface at the same time?

Yes, simultaneous access.

> If the first is the case I think we are royally screwed and I see no good way to
> support this beast.

I'm new to the NVMe driver and spec so I'll tread carefully here...

Resets do present a problem especially since they are specified to
reset pci registers that we do not have access to read/write.

That said, the driver seems to already comprehend instances where the
device does not support nvme_reset_subsystem() requests. I don't know
how often those resets need to be issued in practice.

> If it's the latter let's keep AHCI entirely out
> of the game - add the affected PCI IDs to the NVMe driver itself, add
> a quirk for them and implement the enable sequence inside the NVMe
> driver.

The PCI ID of the AHCI device is not uniquely identifiable when in this mode.

We could flip the arrangement and have the ahci device as the platform
device, but I'm not sure this makes the nvme reset problem much
better.  If we allow subsystem resets at all they would still need to
be coordinated across 2 devices/drivers to reinitialize pci registers.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-24 17:46                 ` Dan Williams
@ 2016-10-24 17:55                   ` Christoph Hellwig
  -1 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-24 17:55 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Keith Busch, Tejun Heo,
	IDE/ATA development list, linux-nvme

On Mon, Oct 24, 2016 at 10:46:29AM -0700, Dan Williams wrote:
> On Mon, Oct 24, 2016 at 8:16 AM, Christoph Hellwig <hch@lst.de> wrote:
> > On Mon, Oct 24, 2016 at 10:46:29AM -0400, Keith Busch wrote:
> >> Amber is aware of this and was supportive in having Intel open the
> >> specs to enable this hardware.
> >
> > Ok, let's get the spec out first.
> 
> The patch contents are it.

Well. that's simply not acceptable.  We will need a theory of
operation for this device to support it, because the patch as-is
simply doesn't make sense.

We'll also need to know where such device might show up, and why
Linux support for it matters.

> > Do we expect to be able to use the
> > AHCI interface and the NVMe interface at the same time?
> 
> Yes, simultaneous access.

Yikes.  I'm really tempted to say this is not acceptable - Intel
really must know better.

> That said, the driver seems to already comprehend instances where the
> device does not support nvme_reset_subsystem() requests. I don't know
> how often those resets need to be issued in practice.

It's not about how often reset are needed (and there are controller
resets in addition to function level resets), but how they are
defined to work.  What state is a controller in after a host initiated
reset, after a PCIe hotplug or even warm plug.  What state is the
PCI device in when the controller is in a failed state?

> > If it's the latter let's keep AHCI entirely out
> > of the game - add the affected PCI IDs to the NVMe driver itself, add
> > a quirk for them and implement the enable sequence inside the NVMe
> > driver.
> 
> The PCI ID of the AHCI device is not uniquely identifiable when in this mode.
> 
> We could flip the arrangement and have the ahci device as the platform
> device, but I'm not sure this makes the nvme reset problem much
> better.  If we allow subsystem resets at all they would still need to
> be coordinated across 2 devices/drivers to reinitialize pci registers.

I think the simple answer is to not support this device.  It's not like
Intel doesn't know the AHCI and NVMe specs and had any reaoson to come
up with a piece of junk like this.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-24 17:55                   ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-24 17:55 UTC (permalink / raw)


On Mon, Oct 24, 2016@10:46:29AM -0700, Dan Williams wrote:
> On Mon, Oct 24, 2016@8:16 AM, Christoph Hellwig <hch@lst.de> wrote:
> > On Mon, Oct 24, 2016@10:46:29AM -0400, Keith Busch wrote:
> >> Amber is aware of this and was supportive in having Intel open the
> >> specs to enable this hardware.
> >
> > Ok, let's get the spec out first.
> 
> The patch contents are it.

Well. that's simply not acceptable.  We will need a theory of
operation for this device to support it, because the patch as-is
simply doesn't make sense.

We'll also need to know where such device might show up, and why
Linux support for it matters.

> > Do we expect to be able to use the
> > AHCI interface and the NVMe interface at the same time?
> 
> Yes, simultaneous access.

Yikes.  I'm really tempted to say this is not acceptable - Intel
really must know better.

> That said, the driver seems to already comprehend instances where the
> device does not support nvme_reset_subsystem() requests. I don't know
> how often those resets need to be issued in practice.

It's not about how often reset are needed (and there are controller
resets in addition to function level resets), but how they are
defined to work.  What state is a controller in after a host initiated
reset, after a PCIe hotplug or even warm plug.  What state is the
PCI device in when the controller is in a failed state?

> > If it's the latter let's keep AHCI entirely out
> > of the game - add the affected PCI IDs to the NVMe driver itself, add
> > a quirk for them and implement the enable sequence inside the NVMe
> > driver.
> 
> The PCI ID of the AHCI device is not uniquely identifiable when in this mode.
> 
> We could flip the arrangement and have the ahci device as the platform
> device, but I'm not sure this makes the nvme reset problem much
> better.  If we allow subsystem resets at all they would still need to
> be coordinated across 2 devices/drivers to reinitialize pci registers.

I think the simple answer is to not support this device.  It's not like
Intel doesn't know the AHCI and NVMe specs and had any reaoson to come
up with a piece of junk like this.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-24 17:46                 ` Dan Williams
@ 2016-10-24 17:57                   ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-24 17:57 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Tejun Heo, IDE/ATA development list, linux-nvme

On Mon, Oct 24, 2016 at 10:46 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> On Mon, Oct 24, 2016 at 8:16 AM, Christoph Hellwig <hch@lst.de> wrote:
[..]
>> If it's the latter let's keep AHCI entirely out
>> of the game - add the affected PCI IDs to the NVMe driver itself, add
>> a quirk for them and implement the enable sequence inside the NVMe
>> driver.
>
> The PCI ID of the AHCI device is not uniquely identifiable when in this mode.
>
> We could flip the arrangement and have the ahci device as the platform
> device, but I'm not sure this makes the nvme reset problem much
> better.  If we allow subsystem resets at all they would still need to
> be coordinated across 2 devices/drivers to reinitialize pci registers.

...but it still might be the better way to go. I.e. push the hacks to
the legacy driver and not burden nvme with this new "nvme_dev_ops"
concept.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-24 17:57                   ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-24 17:57 UTC (permalink / raw)


On Mon, Oct 24, 2016@10:46 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> On Mon, Oct 24, 2016@8:16 AM, Christoph Hellwig <hch@lst.de> wrote:
[..]
>> If it's the latter let's keep AHCI entirely out
>> of the game - add the affected PCI IDs to the NVMe driver itself, add
>> a quirk for them and implement the enable sequence inside the NVMe
>> driver.
>
> The PCI ID of the AHCI device is not uniquely identifiable when in this mode.
>
> We could flip the arrangement and have the ahci device as the platform
> device, but I'm not sure this makes the nvme reset problem much
> better.  If we allow subsystem resets at all they would still need to
> be coordinated across 2 devices/drivers to reinitialize pci registers.

...but it still might be the better way to go. I.e. push the hacks to
the legacy driver and not burden nvme with this new "nvme_dev_ops"
concept.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-24 17:57                   ` Dan Williams
@ 2016-10-24 18:21                     ` Christoph Hellwig
  -1 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-24 18:21 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Keith Busch, Tejun Heo,
	IDE/ATA development list, linux-nvme

On Mon, Oct 24, 2016 at 10:57:16AM -0700, Dan Williams wrote:
> ...but it still might be the better way to go. I.e. push the hacks to
> the legacy driver and not burden nvme with this new "nvme_dev_ops"
> concept.

Let's get some coherent text on how this device even operates out
first and then we can decide if and how we can support it.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-24 18:21                     ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-10-24 18:21 UTC (permalink / raw)


On Mon, Oct 24, 2016@10:57:16AM -0700, Dan Williams wrote:
> ...but it still might be the better way to go. I.e. push the hacks to
> the legacy driver and not burden nvme with this new "nvme_dev_ops"
> concept.

Let's get some coherent text on how this device even operates out
first and then we can decide if and how we can support it.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-24 17:55                   ` Christoph Hellwig
@ 2016-10-24 21:01                     ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-24 21:01 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Tejun Heo, IDE/ATA development list, linux-nvme

On Mon, Oct 24, 2016 at 10:55 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Mon, Oct 24, 2016 at 10:46:29AM -0700, Dan Williams wrote:
>> On Mon, Oct 24, 2016 at 8:16 AM, Christoph Hellwig <hch@lst.de> wrote:
>> > On Mon, Oct 24, 2016 at 10:46:29AM -0400, Keith Busch wrote:
>> >> Amber is aware of this and was supportive in having Intel open the
>> >> specs to enable this hardware.
>> >
>> > Ok, let's get the spec out first.
>>
>> The patch contents are it.
>
> Well. that's simply not acceptable.  We will need a theory of
> operation for this device to support it, because the patch as-is
> simply doesn't make sense.

The presence of ahci bar remapping is documented here:

http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-2.html

The PCI memory space is remapped, the configuration space of the
remapped device is not available, and interrupts are shared.

> We'll also need to know where such device might show up, and why
> Linux support for it matters.

This is a capability of the "Intel® 100 Series Chipset Family".  These
patches matter because this chipset in this configuration appears in
laptops that you can buy today and Linux is unable to access the NVMe
device.

>> > Do we expect to be able to use the
>> > AHCI interface and the NVMe interface at the same time?
>>
>> Yes, simultaneous access.
>
> Yikes.  I'm really tempted to say this is not acceptable - Intel
> really must know better.
>

Linux users are unable to access storage without these patches.

>> That said, the driver seems to already comprehend instances where the
>> device does not support nvme_reset_subsystem() requests. I don't know
>> how often those resets need to be issued in practice.
>
> It's not about how often reset are needed (and there are controller
> resets in addition to function level resets), but how they are
> defined to work.  What state is a controller in after a host initiated
> reset, after a PCIe hotplug or even warm plug.  What state is the
> PCI device in when the controller is in a failed state?

Talking with Keith, subsystem-resets are a feature of enterprise-class
NVMe devices.  I think those features are out of scope for the class
of devices that will find themselves in a platform with this
configuration, same for hot-plug.

>> > If it's the latter let's keep AHCI entirely out
>> > of the game - add the affected PCI IDs to the NVMe driver itself, add
>> > a quirk for them and implement the enable sequence inside the NVMe
>> > driver.
>>
>> The PCI ID of the AHCI device is not uniquely identifiable when in this mode.
>>
>> We could flip the arrangement and have the ahci device as the platform
>> device, but I'm not sure this makes the nvme reset problem much
>> better.  If we allow subsystem resets at all they would still need to
>> be coordinated across 2 devices/drivers to reinitialize pci registers.
>
> I think the simple answer is to not support this device.  It's not like
> Intel doesn't know the AHCI and NVMe specs and had any reaoson to come
> up with a piece of junk like this.

Whatever answer we, in the Linux kernel community, come up with,
leaving storage inaccessible is not an acceptable answer.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-10-24 21:01                     ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-10-24 21:01 UTC (permalink / raw)


On Mon, Oct 24, 2016@10:55 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Mon, Oct 24, 2016@10:46:29AM -0700, Dan Williams wrote:
>> On Mon, Oct 24, 2016@8:16 AM, Christoph Hellwig <hch@lst.de> wrote:
>> > On Mon, Oct 24, 2016@10:46:29AM -0400, Keith Busch wrote:
>> >> Amber is aware of this and was supportive in having Intel open the
>> >> specs to enable this hardware.
>> >
>> > Ok, let's get the spec out first.
>>
>> The patch contents are it.
>
> Well. that's simply not acceptable.  We will need a theory of
> operation for this device to support it, because the patch as-is
> simply doesn't make sense.

The presence of ahci bar remapping is documented here:

http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-2.html

The PCI memory space is remapped, the configuration space of the
remapped device is not available, and interrupts are shared.

> We'll also need to know where such device might show up, and why
> Linux support for it matters.

This is a capability of the "Intel? 100 Series Chipset Family".  These
patches matter because this chipset in this configuration appears in
laptops that you can buy today and Linux is unable to access the NVMe
device.

>> > Do we expect to be able to use the
>> > AHCI interface and the NVMe interface at the same time?
>>
>> Yes, simultaneous access.
>
> Yikes.  I'm really tempted to say this is not acceptable - Intel
> really must know better.
>

Linux users are unable to access storage without these patches.

>> That said, the driver seems to already comprehend instances where the
>> device does not support nvme_reset_subsystem() requests. I don't know
>> how often those resets need to be issued in practice.
>
> It's not about how often reset are needed (and there are controller
> resets in addition to function level resets), but how they are
> defined to work.  What state is a controller in after a host initiated
> reset, after a PCIe hotplug or even warm plug.  What state is the
> PCI device in when the controller is in a failed state?

Talking with Keith, subsystem-resets are a feature of enterprise-class
NVMe devices.  I think those features are out of scope for the class
of devices that will find themselves in a platform with this
configuration, same for hot-plug.

>> > If it's the latter let's keep AHCI entirely out
>> > of the game - add the affected PCI IDs to the NVMe driver itself, add
>> > a quirk for them and implement the enable sequence inside the NVMe
>> > driver.
>>
>> The PCI ID of the AHCI device is not uniquely identifiable when in this mode.
>>
>> We could flip the arrangement and have the ahci device as the platform
>> device, but I'm not sure this makes the nvme reset problem much
>> better.  If we allow subsystem resets at all they would still need to
>> be coordinated across 2 devices/drivers to reinitialize pci registers.
>
> I think the simple answer is to not support this device.  It's not like
> Intel doesn't know the AHCI and NVMe specs and had any reaoson to come
> up with a piece of junk like this.

Whatever answer we, in the Linux kernel community, come up with,
leaving storage inaccessible is not an acceptable answer.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-10-22  0:25 ` Dan Williams
@ 2016-11-15 18:52   ` Christoph Hellwig
  -1 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-11-15 18:52 UTC (permalink / raw)
  To: Dan Williams; +Cc: tj, keith.busch, linux-ide, linux-nvme

As it's been a few weeks since this initial posting:  What's the
status of either getting the magic chipset handshake mode published
to take the chipset out of this degraded mode, or implementing a bridge
driver like VMD?

It seems devices supporting this "slow down the devices and make life
hell for the OS" mode are getting more common, so we'll have to do
something about it.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-11-15 18:52   ` Christoph Hellwig
  0 siblings, 0 replies; 40+ messages in thread
From: Christoph Hellwig @ 2016-11-15 18:52 UTC (permalink / raw)


As it's been a few weeks since this initial posting:  What's the
status of either getting the magic chipset handshake mode published
to take the chipset out of this degraded mode, or implementing a bridge
driver like VMD?

It seems devices supporting this "slow down the devices and make life
hell for the OS" mode are getting more common, so we'll have to do
something about it.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/5] ahci: nvme remap support
  2016-11-15 18:52   ` Christoph Hellwig
@ 2016-11-19  6:12     ` Dan Williams
  -1 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-11-19  6:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tejun Heo, Keith Busch, IDE/ATA development list, linux-nvme

On Tue, Nov 15, 2016 at 10:52 AM, Christoph Hellwig <hch@lst.de> wrote:
> As it's been a few weeks since this initial posting:  What's the
> status of either getting the magic chipset handshake mode published
> to take the chipset out of this degraded mode, or implementing a bridge
> driver like VMD?

The setting is done via write-once registers (once per reset), so
there is no mechanism for an OS to undo it.

> It seems devices supporting this "slow down the devices and make life
> hell for the OS" mode are getting more common, so we'll have to do
> something about it.

To my knowledge all platforms that ship this configuration now already
ship a BIOS, or have an updated BIOS available, that allows the
platform to be placed into AHCI mode.

Setting the controller to AHCI mode is the best option for running
Linux on these systems.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/5] ahci: nvme remap support
@ 2016-11-19  6:12     ` Dan Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Dan Williams @ 2016-11-19  6:12 UTC (permalink / raw)


On Tue, Nov 15, 2016@10:52 AM, Christoph Hellwig <hch@lst.de> wrote:
> As it's been a few weeks since this initial posting:  What's the
> status of either getting the magic chipset handshake mode published
> to take the chipset out of this degraded mode, or implementing a bridge
> driver like VMD?

The setting is done via write-once registers (once per reset), so
there is no mechanism for an OS to undo it.

> It seems devices supporting this "slow down the devices and make life
> hell for the OS" mode are getting more common, so we'll have to do
> something about it.

To my knowledge all platforms that ship this configuration now already
ship a BIOS, or have an updated BIOS available, that allows the
platform to be placed into AHCI mode.

Setting the controller to AHCI mode is the best option for running
Linux on these systems.

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2016-11-19  6:12 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-22  0:25 [PATCH 0/5] ahci: nvme remap support Dan Williams
2016-10-22  0:25 ` Dan Williams
2016-10-22  0:25 ` [PATCH 1/5] " Dan Williams
2016-10-22  0:25   ` Dan Williams
2016-10-22  0:25 ` [PATCH 2/5] nvme: rename "pci" operations to "mmio" Dan Williams
2016-10-22  0:25   ` Dan Williams
2016-10-22  0:25 ` [PATCH 3/5] nvme: introduce nvme_dev_ops Dan Williams
2016-10-22  0:25   ` Dan Williams
2016-10-22  0:25 ` [PATCH 4/5] nvme: move common definitions to pci.h Dan Williams
2016-10-22  0:25   ` Dan Williams
2016-10-22  0:25 ` [PATCH 5/5] nvme: ahci remap support Dan Williams
2016-10-22  0:25   ` Dan Williams
2016-10-22  6:50 ` [PATCH 0/5] ahci: nvme " Christoph Hellwig
2016-10-22  6:50   ` Christoph Hellwig
2016-10-22 19:26   ` Dan Williams
2016-10-22 19:26     ` Dan Williams
2016-10-23  8:34     ` Christoph Hellwig
2016-10-23  8:34       ` Christoph Hellwig
2016-10-23 13:57       ` Dan Williams
2016-10-23 13:57         ` Dan Williams
2016-10-24 12:49         ` Christoph Hellwig
2016-10-24 12:49           ` Christoph Hellwig
2016-10-24 14:46           ` Keith Busch
2016-10-24 14:46             ` Keith Busch
2016-10-24 15:16             ` Christoph Hellwig
2016-10-24 15:16               ` Christoph Hellwig
2016-10-24 17:46               ` Dan Williams
2016-10-24 17:46                 ` Dan Williams
2016-10-24 17:55                 ` Christoph Hellwig
2016-10-24 17:55                   ` Christoph Hellwig
2016-10-24 21:01                   ` Dan Williams
2016-10-24 21:01                     ` Dan Williams
2016-10-24 17:57                 ` Dan Williams
2016-10-24 17:57                   ` Dan Williams
2016-10-24 18:21                   ` Christoph Hellwig
2016-10-24 18:21                     ` Christoph Hellwig
2016-11-15 18:52 ` Christoph Hellwig
2016-11-15 18:52   ` Christoph Hellwig
2016-11-19  6:12   ` Dan Williams
2016-11-19  6:12     ` Dan Williams

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.