linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: linux-nvme@lists.infradead.org
Cc: linux-kernel@vger.kernel.org, Paul Pawlowski <paul@mrarm.io>,
	Jens Axboe <axboe@fb.com>, Keith Busch <kbusch@kernel.org>,
	Christoph Hellwig <hch@lst.de>,
	Minwoo Im <minwoo.im.dev@gmail.com>
Subject: [PATCH v2] nvme-pci: Support shared tags across queues for Apple 2018 controllers
Date: Fri, 19 Jul 2019 14:36:13 +1000	[thread overview]
Message-ID: <f19ac710b4dc28fb3b59ef11bd06d341bc939f3d.camel@kernel.crashing.org> (raw)

Another issue with the Apple T2 based 2018 controllers seem to be
that they blow up (and shut the machine down) if there's a tag
collision between the IO queue and the Admin queue.

My suspicion is that they use our tags for their internal tracking
and don't mix them with the queue id. They also seem to not like
when tags go beyond the IO queue depth, ie 128 tags.

This adds a quirk that offsets all the tags in the IO queue by 32
to avoid those collisions. It also limits the number of IO queues
to 1 since the code wouldn't otherwise make sense (the device
supports only one queue anyway but better safe than sorry) and
reduces the size of the IO queue

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

Note: One thing I noticed is how we have nvme_completion as volatile.

I don't think we really need that, it's forcing the compiler to constantly
reload things which makes no sense once we have established that an
entry is valid.

And since we have a data & control dependency from nvme_cqe_pending(),
we know that reading the CQE is going to depend on it being valid. I
don't really see what volatile is buying us here other than cargo culting.

Cheers,
Ben.

 drivers/nvme/host/nvme.h |  5 ++++
 drivers/nvme/host/pci.c  | 52 +++++++++++++++++++++++++++++++++-------
 2 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ced0e0a7e039..7c6de398de7d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -102,6 +102,11 @@ enum nvme_quirks {
 	 * Use non-standard 128 bytes SQEs.
 	 */
 	NVME_QUIRK_128_BYTES_SQES		= (1 << 11),
+
+	/*
+	 * Prevent tag overlap between queues
+	 */
+	NVME_QUIRK_SHARED_TAGS			= (1 << 12),
 };
 
 /*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 7088971d4c42..c38e946ad8ca 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -178,6 +178,7 @@ struct nvme_queue {
 	u16 cq_head;
 	u16 last_cq_head;
 	u16 qid;
+	u16 tag_offset;
 	u8 cq_phase;
 	u8 sqes;
 	unsigned long flags;
@@ -490,6 +491,7 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
 			    bool write_sq)
 {
 	spin_lock(&nvmeq->sq_lock);
+	cmd->common.command_id += nvmeq->tag_offset;
 	memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
 	       cmd, sizeof(*cmd));
 	if (++nvmeq->sq_tail == nvmeq->q_depth)
@@ -951,9 +953,10 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
 static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 {
 	volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
+	u16 ctag = cqe->command_id - nvmeq->tag_offset;
 	struct request *req;
 
-	if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+	if (unlikely(ctag >= nvmeq->q_depth)) {
 		dev_warn(nvmeq->dev->ctrl.device,
 			"invalid id %d completed on queue %d\n",
 			cqe->command_id, le16_to_cpu(cqe->sq_id));
@@ -966,14 +969,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 	 * aborts.  We don't even bother to allocate a struct request
 	 * for them but rather special case them here.
 	 */
-	if (unlikely(nvmeq->qid == 0 &&
-			cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
+	if (unlikely(nvmeq->qid == 0 && ctag >= NVME_AQ_BLK_MQ_DEPTH)) {
 		nvme_complete_async_event(&nvmeq->dev->ctrl,
 				cqe->status, &cqe->result);
 		return;
 	}
 
-	req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
+	req = blk_mq_tag_to_rq(*nvmeq->tags, ctag);
 	trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
 	nvme_end_request(req, cqe->status, cqe->result);
 }
@@ -1004,7 +1006,10 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
 
 	*start = nvmeq->cq_head;
 	while (nvme_cqe_pending(nvmeq)) {
-		if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+		u16 ctag = nvmeq->cqes[nvmeq->cq_head].command_id;
+
+		ctag -= nvmeq->tag_offset;
+		if (tag == -1U || ctag == tag)
 			found++;
 		nvme_update_cq_head(nvmeq);
 	}
@@ -1487,6 +1492,10 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
 	nvmeq->qid = qid;
 	dev->ctrl.queue_count++;
 
+	if (qid && (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS))
+		nvmeq->tag_offset = NVME_AQ_DEPTH;
+	else
+		nvmeq->tag_offset = 0;
 	return 0;
 
  free_cqdma:
@@ -2106,6 +2115,14 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	unsigned long size;
 
 	nr_io_queues = max_io_queues();
+
+	/*
+	 * If tags are shared with admin queue (Apple bug), then
+	 * make sure we only use one queue.
+	 */
+	if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
+		nr_io_queues = 1;
+
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
 	if (result < 0)
 		return result;
@@ -2300,6 +2317,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 {
 	int result = -ENOMEM;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	unsigned int mqes;
 
 	if (pci_enable_device_mem(pdev))
 		return result;
@@ -2325,8 +2343,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 
 	dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 
-	dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
-				io_queue_depth);
+	mqes = NVME_CAP_MQES(dev->ctrl.cap);
+	dev->q_depth = min_t(int, mqes + 1, io_queue_depth);
 	dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
 	dev->dbs = dev->bar + 4096;
 
@@ -2340,6 +2358,23 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	else
 		dev->io_sqes = NVME_NVM_IOSQES;
 
+	/*
+	 * Another Apple one: If we're going to offset the IO queue tags,
+	 * we still want to make sure they are no bigger than MQES,
+	 * it *looks* like otherwise, bad things happen (I suspect some
+	 * of the tag tracking in that device is limited).
+	 */
+	if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) {
+		if (mqes <= NVME_AQ_DEPTH) {
+			dev_err(dev->ctrl.device, "Apple shared tags quirk"
+				" not compatible with device mqes %d\n", mqes);
+			result = -ENODEV;
+			goto disable;
+		}
+		dev->q_depth = min_t(int, dev->q_depth,
+				     mqes - NVME_AQ_DEPTH + 1);
+	}
+
 	/*
 	 * Temporary fix for the Apple controller found in the MacBook8,1 and
 	 * some MacBook7,1 to avoid controller resets and data loss.
@@ -3057,7 +3092,8 @@ static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005),
 		.driver_data = NVME_QUIRK_SINGLE_VECTOR |
-				NVME_QUIRK_128_BYTES_SQES },
+				NVME_QUIRK_128_BYTES_SQES |
+				NVME_QUIRK_SHARED_TAGS },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, nvme_id_table);



             reply	other threads:[~2019-07-19  4:36 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-19  4:36 Benjamin Herrenschmidt [this message]
2019-07-19  4:43 ` [PATCH v2] nvme-pci: Support shared tags across queues for Apple 2018 controllers Damien Le Moal
2019-07-19  4:49   ` Benjamin Herrenschmidt
2019-07-19  5:00     ` Benjamin Herrenschmidt
2019-07-19  5:01     ` Damien Le Moal
2019-07-19  5:15       ` Benjamin Herrenschmidt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f19ac710b4dc28fb3b59ef11bd06d341bc939f3d.camel@kernel.crashing.org \
    --to=benh@kernel.crashing.org \
    --cc=axboe@fb.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=minwoo.im.dev@gmail.com \
    --cc=paul@mrarm.io \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).