linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets
@ 2020-04-27 23:52 Keith Busch
  2020-04-27 23:52 ` [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets Keith Busch
                   ` (3 more replies)
  0 siblings, 4 replies; 16+ messages in thread
From: Keith Busch @ 2020-04-27 23:52 UTC (permalink / raw)
  To: linux-nvme, hch, sagi; +Cc: Keith Busch

The host memory doorbell and event buffers need to be initialized on
each reset so the driver doesn't observe stale values from the previous
instantiation.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/pci.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cf386c84588b..d388fff9c358 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -228,8 +228,11 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
 {
 	unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
 
-	if (dev->dbbuf_dbs)
+	if (dev->dbbuf_dbs) {
+		memset(dev->dbbuf_dbs, 0, mem_size);
+		memset(dev->dbbuf_eis, 0, mem_size);
 		return 0;
+	}
 
 	dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
 					    &dev->dbbuf_dbs_dma_addr,
-- 
2.24.1


_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets
  2020-04-27 23:52 [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets Keith Busch
@ 2020-04-27 23:52 ` Keith Busch
  2020-04-30  6:36   ` Dongli Zhang
                     ` (2 more replies)
  2020-04-27 23:52 ` [PATCH 3/3] nvme-pci: reshuffle nvme_queue members Keith Busch
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 16+ messages in thread
From: Keith Busch @ 2020-04-27 23:52 UTC (permalink / raw)
  To: linux-nvme, hch, sagi; +Cc: Keith Busch

Real nvme hardware doesn't support the shadow doorbell feature. Remove
the overhead of saving this special feature and instead obtain the
address offsets from device providing it.

And when this feature is in use, the specification requires all queue
updates use this mechanism, so don't don't treat the admin queue
differently.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/pci.c | 86 ++++++++++++++++++++---------------------
 1 file changed, 41 insertions(+), 45 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d388fff9c358..6d91fc5ee4d1 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -182,10 +182,6 @@ struct nvme_queue {
 #define NVMEQ_SQ_CMB		1
 #define NVMEQ_DELETE_ERROR	2
 #define NVMEQ_POLLED		3
-	u32 *dbbuf_sq_db;
-	u32 *dbbuf_cq_db;
-	u32 *dbbuf_sq_ei;
-	u32 *dbbuf_cq_ei;
 	struct completion delete_done;
 };
 
@@ -268,18 +264,6 @@ static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
 	}
 }
 
-static void nvme_dbbuf_init(struct nvme_dev *dev,
-			    struct nvme_queue *nvmeq, int qid)
-{
-	if (!dev->dbbuf_dbs || !qid)
-		return;
-
-	nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)];
-	nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)];
-	nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)];
-	nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)];
-}
-
 static void nvme_dbbuf_set(struct nvme_dev *dev)
 {
 	struct nvme_command c;
@@ -299,7 +283,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
 	}
 }
 
-static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old)
+static inline bool nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old)
 {
 	return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old);
 }
@@ -308,31 +292,48 @@ static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old)
 static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
 					      volatile u32 *dbbuf_ei)
 {
-	if (dbbuf_db) {
-		u16 old_value;
+	u16 old_value;
 
-		/*
-		 * Ensure that the queue is written before updating
-		 * the doorbell in memory
-		 */
-		wmb();
+	/*
+	 * Ensure that the queue is written before updating the doorbell in
+	 * memory
+	 */
+	wmb();
 
-		old_value = *dbbuf_db;
-		*dbbuf_db = value;
+	old_value = *dbbuf_db;
+	*dbbuf_db = value;
 
-		/*
-		 * Ensure that the doorbell is updated before reading the event
-		 * index from memory.  The controller needs to provide similar
-		 * ordering to ensure the envent index is updated before reading
-		 * the doorbell.
-		 */
-		mb();
+	/*
+	 * Ensure that the doorbell is updated before reading the event index
+	 * from memory.  The controller needs to provide similar ordering to
+	 * ensure the envent index is updated before reading the doorbell.
+	 */
+	mb();
+	return nvme_dbbuf_need_event(*dbbuf_ei, value, old_value);
+}
 
-		if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value))
-			return false;
-	}
+static bool nvme_dbbuf_update_sq(struct nvme_queue *nvmeq)
+{
+	struct nvme_dev *dev = nvmeq->dev;
 
-	return true;
+	if (!dev->dbbuf_dbs)
+		return true;
+
+	return nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
+		&dev->dbbuf_dbs[sq_idx(nvmeq->qid, dev->db_stride)],
+		&dev->dbbuf_eis[sq_idx(nvmeq->qid, dev->db_stride)]);
+}
+
+static bool nvme_dbbuf_update_cq(struct nvme_queue *nvmeq)
+{
+	struct nvme_dev *dev = nvmeq->dev;
+
+	if (!dev->dbbuf_dbs)
+		return true;
+
+	return nvme_dbbuf_update_and_check_event(nvmeq->cq_head,
+		&dev->dbbuf_dbs[cq_idx(nvmeq->qid, dev->db_stride)],
+		&dev->dbbuf_eis[cq_idx(nvmeq->qid, dev->db_stride)]);
 }
 
 /*
@@ -450,8 +451,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
 
 static inline void nvme_write_sq_db(struct nvme_queue *nvmeq)
 {
-	if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
-			nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
+	if (nvme_dbbuf_update_sq(nvmeq))
 		writel(nvmeq->sq_tail, nvmeq->q_db);
 }
 
@@ -918,11 +918,8 @@ static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
 
 static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
 {
-	u16 head = nvmeq->cq_head;
-
-	if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
-					      nvmeq->dbbuf_cq_ei))
-		writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+	if (nvme_dbbuf_update_cq(nvmeq))
+		writel(nvmeq->cq_head, nvmeq->q_db + nvmeq->dev->db_stride);
 }
 
 static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
@@ -1483,7 +1480,6 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
 	nvmeq->cq_phase = 1;
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq));
-	nvme_dbbuf_init(dev, nvmeq, qid);
 	dev->online_queues++;
 	wmb(); /* ensure the first interrupt sees the initialization */
 }
-- 
2.24.1


_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/3] nvme-pci: reshuffle nvme_queue members
  2020-04-27 23:52 [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets Keith Busch
  2020-04-27 23:52 ` [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets Keith Busch
@ 2020-04-27 23:52 ` Keith Busch
  2020-05-01 12:57   ` Christoph Hellwig
  2020-05-01 12:49 ` [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets Christoph Hellwig
  2021-10-04  9:35 ` John Levon
  3 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2020-04-27 23:52 UTC (permalink / raw)
  To: linux-nvme, hch, sagi; +Cc: Keith Busch

All the io path hot members can fit within the first 64-bytes, which is
a common cacheline. Order the members of this struct so those members
fit in and align to that window.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/pci.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6d91fc5ee4d1..c16cf9fd4483 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -164,12 +164,14 @@ struct nvme_queue {
 	struct nvme_dev *dev;
 	spinlock_t sq_lock;
 	void *sq_cmds;
-	 /* only used for poll queues: */
-	spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
 	struct nvme_completion *cqes;
-	dma_addr_t sq_dma_addr;
-	dma_addr_t cq_dma_addr;
 	u32 __iomem *q_db;
+	unsigned long flags;
+#define NVMEQ_ENABLED		0
+#define NVMEQ_SQ_CMB		1
+#define NVMEQ_DELETE_ERROR	2
+#define NVMEQ_POLLED		3
+
 	u16 q_depth;
 	u16 cq_vector;
 	u16 sq_tail;
@@ -177,13 +179,13 @@ struct nvme_queue {
 	u16 qid;
 	u8 cq_phase;
 	u8 sqes;
-	unsigned long flags;
-#define NVMEQ_ENABLED		0
-#define NVMEQ_SQ_CMB		1
-#define NVMEQ_DELETE_ERROR	2
-#define NVMEQ_POLLED		3
+
+	/* only used for poll queues: */
+	spinlock_t cq_poll_lock;
+	dma_addr_t sq_dma_addr;
+	dma_addr_t cq_dma_addr;
 	struct completion delete_done;
-};
+} ____cacheline_aligned_in_smp;
 
 /*
  * The nvme_iod describes the data in an I/O.
-- 
2.24.1


_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets
  2020-04-27 23:52 ` [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets Keith Busch
@ 2020-04-30  6:36   ` Dongli Zhang
  2020-04-30 19:07     ` Keith Busch
  2020-05-01 12:54   ` Christoph Hellwig
  2021-10-04  9:42   ` John Levon
  2 siblings, 1 reply; 16+ messages in thread
From: Dongli Zhang @ 2020-04-30  6:36 UTC (permalink / raw)
  To: Keith Busch, linux-nvme, hch, sagi

Hi Keith,

On 4/27/20 4:52 PM, Keith Busch wrote:
> Real nvme hardware doesn't support the shadow doorbell feature. Remove

I used to test with nvme emulated by qemu and NVME_CTRL_OACS_DBBUF_SUPP is not
set yet.

Would you please share which emulator would support NVME_CTRL_OACS_DBBUF_SUPP?

Thank you very much!

Dongli Zhang

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets
  2020-04-30  6:36   ` Dongli Zhang
@ 2020-04-30 19:07     ` Keith Busch
  0 siblings, 0 replies; 16+ messages in thread
From: Keith Busch @ 2020-04-30 19:07 UTC (permalink / raw)
  To: Dongli Zhang; +Cc: hch, linux-nvme, sagi

On Wed, Apr 29, 2020 at 11:36:52PM -0700, Dongli Zhang wrote:
> Hi Keith,
> 
> On 4/27/20 4:52 PM, Keith Busch wrote:
> > Real nvme hardware doesn't support the shadow doorbell feature. Remove
> 
> I used to test with nvme emulated by qemu and NVME_CTRL_OACS_DBBUF_SUPP is not
> set yet.
> 
> Would you please share which emulator would support NVME_CTRL_OACS_DBBUF_SUPP?

Sorry, there is no freely available implementation that I'm aware of.
I think qemu could benefit from this feature, and there had been some
implementation attempts, but nothing that ever made it upstream.

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets
  2020-04-27 23:52 [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets Keith Busch
  2020-04-27 23:52 ` [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets Keith Busch
  2020-04-27 23:52 ` [PATCH 3/3] nvme-pci: reshuffle nvme_queue members Keith Busch
@ 2020-05-01 12:49 ` Christoph Hellwig
  2021-10-04  9:35 ` John Levon
  3 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2020-05-01 12:49 UTC (permalink / raw)
  To: Keith Busch; +Cc: hch, linux-nvme, sagi

On Mon, Apr 27, 2020 at 04:52:41PM -0700, Keith Busch wrote:
> The host memory doorbell and event buffers need to be initialized on
> each reset so the driver doesn't observe stale values from the previous
> instantiation.
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>  drivers/nvme/host/pci.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index cf386c84588b..d388fff9c358 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -228,8 +228,11 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
>  {
>  	unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
>  
> -	if (dev->dbbuf_dbs)
> +	if (dev->dbbuf_dbs) {
> +		memset(dev->dbbuf_dbs, 0, mem_size);
> +		memset(dev->dbbuf_eis, 0, mem_size);
>  		return 0;

Can you throw in a comment why the memory is cleared?


_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets
  2020-04-27 23:52 ` [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets Keith Busch
  2020-04-30  6:36   ` Dongli Zhang
@ 2020-05-01 12:54   ` Christoph Hellwig
  2021-10-04  9:42   ` John Levon
  2 siblings, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2020-05-01 12:54 UTC (permalink / raw)
  To: Keith Busch; +Cc: hch, linux-nvme, sagi

This doesn't actually apply.  Some comments below anyway:

> +static bool nvme_dbbuf_update_sq(struct nvme_queue *nvmeq)
> +{
> +	struct nvme_dev *dev = nvmeq->dev;
>  
> +	if (!dev->dbbuf_dbs)
> +		return true;

I'd rather have this check in the caller.  That makes it both more
obvious to read, and avoids a function call for the fast path.

> +static bool nvme_dbbuf_update_cq(struct nvme_queue *nvmeq)
> +{
> +	struct nvme_dev *dev = nvmeq->dev;
> +
> +	if (!dev->dbbuf_dbs)
> +		return true;

Same here.

>  static inline void nvme_write_sq_db(struct nvme_queue *nvmeq)
>  {
> +	if (nvme_dbbuf_update_sq(nvmeq))
>  		writel(nvmeq->sq_tail, nvmeq->q_db);
>  }

It might be worth to just open code this in the two callers, even
if the additional check I suggested above.

>  
> @@ -918,11 +918,8 @@ static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
>  
>  static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
>  {
> +	if (nvme_dbbuf_update_cq(nvmeq))
> +		writel(nvmeq->cq_head, nvmeq->q_db + nvmeq->dev->db_stride);

Same here for the only caller.

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] nvme-pci: reshuffle nvme_queue members
  2020-04-27 23:52 ` [PATCH 3/3] nvme-pci: reshuffle nvme_queue members Keith Busch
@ 2020-05-01 12:57   ` Christoph Hellwig
  2020-05-01 15:08     ` Keith Busch
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2020-05-01 12:57 UTC (permalink / raw)
  To: Keith Busch; +Cc: hch, linux-nvme, sagi

On Mon, Apr 27, 2020 at 04:52:43PM -0700, Keith Busch wrote:
> All the io path hot members can fit within the first 64-bytes, which is
> a common cacheline. Order the members of this struct so those members
> fit in and align to that window.

Do we even want to share a cacheline for the submission vs completion
path?  I know other places try to keep the deliberately separate.

> +	/* only used for poll queues: */
> +	spinlock_t cq_poll_lock;
> +	dma_addr_t sq_dma_addr;
> +	dma_addr_t cq_dma_addr;
>  	struct completion delete_done;

We probably want a comment before the last three that this is only
used during queue deletion.  Also cq_poll_lock very much is in the
hot path for polled I/O, so I'd rather keep it with other CQ bits.

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] nvme-pci: reshuffle nvme_queue members
  2020-05-01 12:57   ` Christoph Hellwig
@ 2020-05-01 15:08     ` Keith Busch
  2020-05-01 15:12       ` Christoph Hellwig
  2020-05-04 18:18       ` Keith Busch
  0 siblings, 2 replies; 16+ messages in thread
From: Keith Busch @ 2020-05-01 15:08 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: sagi, linux-nvme

On Fri, May 01, 2020 at 02:57:04PM +0200, Christoph Hellwig wrote:
> On Mon, Apr 27, 2020 at 04:52:43PM -0700, Keith Busch wrote:
> > All the io path hot members can fit within the first 64-bytes, which is
> > a common cacheline. Order the members of this struct so those members
> > fit in and align to that window.
> 
> Do we even want to share a cacheline for the submission vs completion
> path?  I know other places try to keep the deliberately separate.

We usually complete on the same CPU that submitted, so it appears
beneficial to fit both sides in the same line. I'll try it out both
ways, though I don't think I'll be able to measure a difference.
 
> > +	/* only used for poll queues: */
> > +	spinlock_t cq_poll_lock;
> > +	dma_addr_t sq_dma_addr;
> > +	dma_addr_t cq_dma_addr;
> >  	struct completion delete_done;
> 
> We probably want a comment before the last three that this is only
> used during queue deletion.  Also cq_poll_lock very much is in the
> hot path for polled I/O, so I'd rather keep it with other CQ bits.

Yeah, I moved the cq lock so 'flags' could fit since we're still using
that in the submission path. We should be able to get rid of that hack
at some point, so I'll redo this as you're suggesting.

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] nvme-pci: reshuffle nvme_queue members
  2020-05-01 15:08     ` Keith Busch
@ 2020-05-01 15:12       ` Christoph Hellwig
  2020-05-01 15:23         ` Keith Busch
  2020-05-04 18:18       ` Keith Busch
  1 sibling, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2020-05-01 15:12 UTC (permalink / raw)
  To: Keith Busch; +Cc: Christoph Hellwig, linux-nvme, sagi

On Sat, May 02, 2020 at 12:08:15AM +0900, Keith Busch wrote:
> > > +	/* only used for poll queues: */
> > > +	spinlock_t cq_poll_lock;
> > > +	dma_addr_t sq_dma_addr;
> > > +	dma_addr_t cq_dma_addr;
> > >  	struct completion delete_done;
> > 
> > We probably want a comment before the last three that this is only
> > used during queue deletion.  Also cq_poll_lock very much is in the
> > hot path for polled I/O, so I'd rather keep it with other CQ bits.
> 
> Yeah, I moved the cq lock so 'flags' could fit since we're still using
> that in the submission path. We should be able to get rid of that hack
> at some point, so I'll redo this as you're suggesting.

We could easily kill the qid field by using the same trick as
the fabrics drivers.  But that is only 16-bits, does that give you
enough space?

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] nvme-pci: reshuffle nvme_queue members
  2020-05-01 15:12       ` Christoph Hellwig
@ 2020-05-01 15:23         ` Keith Busch
  0 siblings, 0 replies; 16+ messages in thread
From: Keith Busch @ 2020-05-01 15:23 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: sagi, linux-nvme

On Fri, May 01, 2020 at 05:12:46PM +0200, Christoph Hellwig wrote:
> On Sat, May 02, 2020 at 12:08:15AM +0900, Keith Busch wrote:
> > > > +	/* only used for poll queues: */
> > > > +	spinlock_t cq_poll_lock;
> > > > +	dma_addr_t sq_dma_addr;
> > > > +	dma_addr_t cq_dma_addr;
> > > >  	struct completion delete_done;
> > > 
> > > We probably want a comment before the last three that this is only
> > > used during queue deletion.  Also cq_poll_lock very much is in the
> > > hot path for polled I/O, so I'd rather keep it with other CQ bits.
> > 
> > Yeah, I moved the cq lock so 'flags' could fit since we're still using
> > that in the submission path. We should be able to get rid of that hack
> > at some point, so I'll redo this as you're suggesting.
> 
> We could easily kill the qid field by using the same trick as
> the fabrics drivers.  But that is only 16-bits, does that give you
> enough space?

Yes, killing qid and moving the cq_vector (which is not used in the io
path) will let everything fit. Thanks for the suggestion.

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/3] nvme-pci: reshuffle nvme_queue members
  2020-05-01 15:08     ` Keith Busch
  2020-05-01 15:12       ` Christoph Hellwig
@ 2020-05-04 18:18       ` Keith Busch
  1 sibling, 0 replies; 16+ messages in thread
From: Keith Busch @ 2020-05-04 18:18 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: sagi, linux-nvme

On Sat, May 02, 2020 at 12:08:15AM +0900, Keith Busch wrote:
> On Fri, May 01, 2020 at 02:57:04PM +0200, Christoph Hellwig wrote:
> > On Mon, Apr 27, 2020 at 04:52:43PM -0700, Keith Busch wrote:
> > > All the io path hot members can fit within the first 64-bytes, which is
> > > a common cacheline. Order the members of this struct so those members
> > > fit in and align to that window.
> > 
> > Do we even want to share a cacheline for the submission vs completion
> > path?  I know other places try to keep the deliberately separate.
> 
> We usually complete on the same CPU that submitted, so it appears
> beneficial to fit both sides in the same line. I'll try it out both
> ways, though I don't think I'll be able to measure a difference.

If there are more cpus than queues, there is a real benefit to
separating submit and complete into different cache lines, otherwise
they invalidate each other when submissions occur on cpus different than
completions. The extra space provides more flexibility arranging the
struct, so I'll experiment with that a bit more.

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets
  2020-04-27 23:52 [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets Keith Busch
                   ` (2 preceding siblings ...)
  2020-05-01 12:49 ` [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets Christoph Hellwig
@ 2021-10-04  9:35 ` John Levon
  2021-10-06 12:07   ` Keith Busch
  3 siblings, 1 reply; 16+ messages in thread
From: John Levon @ 2021-10-04  9:35 UTC (permalink / raw)
  To: Keith Busch; +Cc: linux-nvme, hch, sagi

On Mon, Apr 27, 2020 at 04:52:41PM -0700, Keith Busch wrote:

> The host memory doorbell and event buffers need to be initialized on
> each reset so the driver doesn't observe stale values from the previous
> instantiation.
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>  drivers/nvme/host/pci.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index cf386c84588b..d388fff9c358 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -228,8 +228,11 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
>  {
>  	unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
>  
> -	if (dev->dbbuf_dbs)
> +	if (dev->dbbuf_dbs) {
> +		memset(dev->dbbuf_dbs, 0, mem_size);
> +		memset(dev->dbbuf_eis, 0, mem_size);
>  		return 0;
> +	}
>  
>  	dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
>  					    &dev->dbbuf_dbs_dma_addr,

Hi Keith, we came across this issue recently, and we just found this above
patch. Was there any specific reason it was never merged?

thanks
john

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets
  2020-04-27 23:52 ` [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets Keith Busch
  2020-04-30  6:36   ` Dongli Zhang
  2020-05-01 12:54   ` Christoph Hellwig
@ 2021-10-04  9:42   ` John Levon
  2 siblings, 0 replies; 16+ messages in thread
From: John Levon @ 2021-10-04  9:42 UTC (permalink / raw)
  To: Keith Busch; +Cc: linux-nvme, hch, sagi

On Mon, Apr 27, 2020 at 04:52:42PM -0700, Keith Busch wrote:

> And when this feature is in use, the specification requires all queue
> updates use this mechanism, so don't don't treat the admin queue
> differently.

Also came across this patch of yours. We were wondering why Linux isn't spec
compliant for the admin queues. But this would cause implementation difficulties
for a controller, right? Currently we presume that the admin queue's shadow
doorbells aren't valid, so always look at BAR0 location. It's unclear what we'd
do if this change were made in Linux, presuming we need to support older Linux
versions. Some hack that observes a non-zero shadow value and presumes the
admin queue is using shadow doorbells?

Thoughts?

thanks
john

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets
  2021-10-04  9:35 ` John Levon
@ 2021-10-06 12:07   ` Keith Busch
  2021-10-06 16:05     ` John Levon
  0 siblings, 1 reply; 16+ messages in thread
From: Keith Busch @ 2021-10-06 12:07 UTC (permalink / raw)
  To: John Levon; +Cc: linux-nvme, hch, sagi

On Mon, Oct 04, 2021 at 09:35:04AM +0000, John Levon wrote:
> On Mon, Apr 27, 2020 at 04:52:41PM -0700, Keith Busch wrote:
> 
> > The host memory doorbell and event buffers need to be initialized on
> > each reset so the driver doesn't observe stale values from the previous
> > instantiation.
> > 
> > Signed-off-by: Keith Busch <kbusch@kernel.org>
> > ---
> >  drivers/nvme/host/pci.c | 5 ++++-
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> > index cf386c84588b..d388fff9c358 100644
> > --- a/drivers/nvme/host/pci.c
> > +++ b/drivers/nvme/host/pci.c
> > @@ -228,8 +228,11 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
> >  {
> >  	unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
> >  
> > -	if (dev->dbbuf_dbs)
> > +	if (dev->dbbuf_dbs) {
> > +		memset(dev->dbbuf_dbs, 0, mem_size);
> > +		memset(dev->dbbuf_eis, 0, mem_size);
> >  		return 0;
> > +	}
> >  
> >  	dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
> >  					    &dev->dbbuf_dbs_dma_addr,
> 
> Hi Keith, we came across this issue recently, and we just found this above
> patch. Was there any specific reason it was never merged?

I don't recall why this wasn't merged. I can't find this series in my
tree anymore, and it also appears that the mailing list has lost the
original patch set. Do you happen to have the rest of the patch emails?
If so, could you possibly send them to me?

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets
  2021-10-06 12:07   ` Keith Busch
@ 2021-10-06 16:05     ` John Levon
  0 siblings, 0 replies; 16+ messages in thread
From: John Levon @ 2021-10-06 16:05 UTC (permalink / raw)
  To: Keith Busch; +Cc: linux-nvme, hch, sagi

On Wed, Oct 06, 2021 at 05:07:34AM -0700, Keith Busch wrote:

> On Mon, Oct 04, 2021 at 09:35:04AM +0000, John Levon wrote:
> > On Mon, Apr 27, 2020 at 04:52:41PM -0700, Keith Busch wrote:
> > 
> > > The host memory doorbell and event buffers need to be initialized on
> > > each reset so the driver doesn't observe stale values from the previous
> > > instantiation.
> > > 
> > > Signed-off-by: Keith Busch <kbusch@kernel.org>
> > > ---
> > >  drivers/nvme/host/pci.c | 5 ++++-
> > >  1 file changed, 4 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> > > index cf386c84588b..d388fff9c358 100644
> > > --- a/drivers/nvme/host/pci.c
> > > +++ b/drivers/nvme/host/pci.c
> > > @@ -228,8 +228,11 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
> > >  {
> > >  	unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
> > >  
> > > -	if (dev->dbbuf_dbs)
> > > +	if (dev->dbbuf_dbs) {
> > > +		memset(dev->dbbuf_dbs, 0, mem_size);
> > > +		memset(dev->dbbuf_eis, 0, mem_size);
> > >  		return 0;
> > > +	}
> > >  
> > >  	dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
> > >  					    &dev->dbbuf_dbs_dma_addr,
> > 
> > Hi Keith, we came across this issue recently, and we just found this above
> > patch. Was there any specific reason it was never merged?
> 
> I don't recall why this wasn't merged. I can't find this series in my
> tree anymore, and it also appears that the mailing list has lost the
> original patch set. Do you happen to have the rest of the patch emails?
> If so, could you possibly send them to me?

I found them via lore:

https://lore.kernel.org/linux-nvme/20200427235243.2268765-1-kbusch@kernel.org/

regards
john

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-10-06 16:06 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-27 23:52 [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets Keith Busch
2020-04-27 23:52 ` [PATCH 2/3] nvme-pci: remove cached shadow doorbell offsets Keith Busch
2020-04-30  6:36   ` Dongli Zhang
2020-04-30 19:07     ` Keith Busch
2020-05-01 12:54   ` Christoph Hellwig
2021-10-04  9:42   ` John Levon
2020-04-27 23:52 ` [PATCH 3/3] nvme-pci: reshuffle nvme_queue members Keith Busch
2020-05-01 12:57   ` Christoph Hellwig
2020-05-01 15:08     ` Keith Busch
2020-05-01 15:12       ` Christoph Hellwig
2020-05-01 15:23         ` Keith Busch
2020-05-04 18:18       ` Keith Busch
2020-05-01 12:49 ` [PATCH 1/3] nvme-pci: clear shadow doorbell memory on resets Christoph Hellwig
2021-10-04  9:35 ` John Levon
2021-10-06 12:07   ` Keith Busch
2021-10-06 16:05     ` John Levon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).