* [PATCH v2] nvme/pci: remap BAR0 to cover admin CQ doorbell for large stride
@ 2017-05-22 5:09 Xu Yu
2017-05-23 7:24 ` Christoph Hellwig
0 siblings, 1 reply; 2+ messages in thread
From: Xu Yu @ 2017-05-22 5:09 UTC (permalink / raw)
To: linux-nvme
Cc: linux-kernel, keith.busch, axboe, hch, sagi, haozhong.zhang, yu.a.xu
The existing driver initially maps 8192 bytes of BAR0 which is
intended to cover doorbells of admin SQ and CQ. However, if a
large stride, e.g. 10, is used, the doorbell of admin CQ will
be out of 8192 bytes. Consequently, a page fault will be raised
when the admin CQ doorbell is accessed in nvme_configure_admin_queue().
This patch fixes this issue by remapping BAR0 before accessing
admin CQ doorbell if the initial mapping is not enough.
Signed-off-by: Xu Yu <yu.a.xu@intel.com>
---
Changes since v1:
* Move the bar (re)mapping logic in nvme_dev_map(), nvme_configure_admin_queue()
and nvme_setup_io_queues() to a new helper nvme_remap_bar().
* Replace several magic numbers by PAGE_SIZE and the new NVME_REG_DBS.
---
drivers/nvme/host/pci.c | 63 ++++++++++++++++++++++++++++++++-----------------
include/linux/nvme.h | 1 +
2 files changed, 42 insertions(+), 22 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9d4640a..84a254a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -91,6 +91,7 @@ struct nvme_dev {
int q_depth;
u32 db_stride;
void __iomem *bar;
+ unsigned long bar_mapped_size;
struct work_struct reset_work;
struct work_struct remove_work;
struct timer_list watchdog_timer;
@@ -1316,6 +1317,30 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
return 0;
}
+static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+ return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
+}
+
+static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (size <= dev->bar_mapped_size)
+ return 0;
+ if (dev->bar)
+ iounmap(dev->bar);
+ dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+ if (!dev->bar) {
+ dev->bar_mapped_size = 0;
+ return -ENOMEM;
+ }
+ dev->bar_mapped_size = size;
+ dev->dbs = dev->bar + NVME_REG_DBS;
+
+ return 0;
+}
+
static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
int result;
@@ -1323,6 +1348,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
+ result = nvme_remap_bar(dev, db_bar_size(dev, 0));
+ if (result < 0)
+ return result;
+
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
NVME_CAP_NSSRC(cap) : 0;
@@ -1514,16 +1543,12 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
}
}
-static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
-{
- return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
-}
-
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int result, nr_io_queues, size;
+ int result, nr_io_queues;
+ unsigned long size;
nr_io_queues = num_online_cpus();
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1542,20 +1567,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
nvme_release_cmb(dev);
}
- size = db_bar_size(dev, nr_io_queues);
- if (size > 8192) {
- iounmap(dev->bar);
- do {
- dev->bar = ioremap(pci_resource_start(pdev, 0), size);
- if (dev->bar)
- break;
- if (!--nr_io_queues)
- return -ENOMEM;
- size = db_bar_size(dev, nr_io_queues);
- } while (1);
- dev->dbs = dev->bar + 4096;
- adminq->q_db = dev->dbs;
- }
+ do {
+ size = db_bar_size(dev, nr_io_queues);
+ result = nvme_remap_bar(dev, size);
+ if (!result)
+ break;
+ if (!--nr_io_queues)
+ return -ENOMEM;
+ } while (1);
+ adminq->q_db = dev->dbs;
/* Deregister the admin queue's interrupt */
free_irq(pci_irq_vector(pdev, 0), adminq);
@@ -2061,8 +2081,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (pci_request_mem_regions(pdev, "nvme"))
return -ENODEV;
- dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
- if (!dev->bar)
+ if (nvme_remap_bar(dev, NVME_REG_DBS + PAGE_SIZE))
goto release;
return 0;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b625bac..7715be4 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -101,6 +101,7 @@ enum {
NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
+ NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
};
#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
--
1.9.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH v2] nvme/pci: remap BAR0 to cover admin CQ doorbell for large stride
2017-05-22 5:09 [PATCH v2] nvme/pci: remap BAR0 to cover admin CQ doorbell for large stride Xu Yu
@ 2017-05-23 7:24 ` Christoph Hellwig
0 siblings, 0 replies; 2+ messages in thread
From: Christoph Hellwig @ 2017-05-23 7:24 UTC (permalink / raw)
To: Xu Yu
Cc: linux-nvme, linux-kernel, keith.busch, axboe, hch, sagi, haozhong.zhang
On Mon, May 22, 2017 at 01:09:21PM +0800, Xu Yu wrote:
> The existing driver initially maps 8192 bytes of BAR0 which is
> intended to cover doorbells of admin SQ and CQ. However, if a
> large stride, e.g. 10, is used, the doorbell of admin CQ will
> be out of 8192 bytes. Consequently, a page fault will be raised
> when the admin CQ doorbell is accessed in nvme_configure_admin_queue().
>
> This patch fixes this issue by remapping BAR0 before accessing
> admin CQ doorbell if the initial mapping is not enough.
>
> Signed-off-by: Xu Yu <yu.a.xu@intel.com>
> ---
> Changes since v1:
> * Move the bar (re)mapping logic in nvme_dev_map(), nvme_configure_admin_queue()
> and nvme_setup_io_queues() to a new helper nvme_remap_bar().
> * Replace several magic numbers by PAGE_SIZE and the new NVME_REG_DBS.
> ---
> drivers/nvme/host/pci.c | 63 ++++++++++++++++++++++++++++++++-----------------
> include/linux/nvme.h | 1 +
> 2 files changed, 42 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 9d4640a..84a254a 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -91,6 +91,7 @@ struct nvme_dev {
> int q_depth;
> u32 db_stride;
> void __iomem *bar;
> + unsigned long bar_mapped_size;
> struct work_struct reset_work;
> struct work_struct remove_work;
> struct timer_list watchdog_timer;
> @@ -1316,6 +1317,30 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
> return 0;
> }
>
> +static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
> +{
> + return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
> +}
> +
> +static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
> +{
> + struct pci_dev *pdev = to_pci_dev(dev->dev);
> +
> + if (size <= dev->bar_mapped_size)
> + return 0;
Can we add a sanitiy check that size is <= pci_resource_len() somewhere?
> - dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
> - if (!dev->bar)
> + if (nvme_remap_bar(dev, NVME_REG_DBS + PAGE_SIZE))
page size isn't correct here, we had a constant 4096 which might be
smaller than page size. A comment on why we did chose 4096 might
be useful, but the reason for it might be lost in history..
Otherwise this looks great to me.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-05-23 7:24 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-22 5:09 [PATCH v2] nvme/pci: remap BAR0 to cover admin CQ doorbell for large stride Xu Yu
2017-05-23 7:24 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).