IOMMU Archive on lore.kernel.org
 help / color / Atom feed
From: Logan Gunthorpe <logang@deltatee.com>
To: linux-kernel@vger.kernel.org, linux-ntb@googlegroups.com,
	linux-pci@vger.kernel.org, iommu@lists.linux-foundation.org,
	linux-kselftest@vger.kernel.org, Jon Mason <jdmason@kudzu.us>,
	Joerg Roedel <joro@8bytes.org>
Cc: Allen Hubbe <allenbh@gmail.com>,
	Dave Jiang <dave.jiang@intel.com>,
	Serge Semin <fancer.lancer@gmail.com>,
	Eric Pilmore <epilmore@gigaio.com>,
	Bjorn Helgaas <bhelgaas@google.com>,
	Logan Gunthorpe <logang@deltatee.com>
Subject: [PATCH v4 09/10] NTB: Add MSI interrupt support to ntb_transport
Date: Mon, 22 Apr 2019 15:05:27 -0600
Message-ID: <20190422210528.15289-10-logang@deltatee.com> (raw)
Message-ID: <20190422210527.TMuvHvLJw92PEmjYq9UUprTkgFRDI45fIhKwJhkoTlY@z> (raw)
In-Reply-To: <20190422210528.15289-1-logang@deltatee.com>

Introduce the module parameter 'use_msi' which, when set, uses
MSI interrupts instead of doorbells for each queue pair (QP). The
parameter is only available if NTB MSI support is configured into
the kernel. We also require there to be more than one memory window
(MW) so that an extra one is available to forward the APIC region.

To use MSIs, we request one interrupt per QP and forward the MSI address
and data to the peer using scratch pad registers (SPADS) above the MW
SPADS. (If there are not enough SPADS the MSI interrupt will not be used.)

Once registered, we simply use ntb_msi_peer_trigger and the receiving
ISR simply queues up the rxc_db_work for the queue.

This addition can significantly improve performance of ntb_transport.
In a simple, untuned, apples-to-apples comparision using ntb_netdev
and iperf with switchtec hardware, I see 3.88Gb/s without MSI
interrupts and 14.1Gb/s wit MSI, which is a more than 3x improvement.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Cc: Jon Mason <jdmason@kudzu.us>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Allen Hubbe <allenbh@gmail.com>
---
 drivers/ntb/ntb_transport.c | 169 +++++++++++++++++++++++++++++++++++-
 1 file changed, 168 insertions(+), 1 deletion(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index d4f39ba1d976..f1cf0942cb99 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -93,6 +93,12 @@ static bool use_dma;
 module_param(use_dma, bool, 0644);
 MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
 
+static bool use_msi;
+#ifdef CONFIG_NTB_MSI
+module_param(use_msi, bool, 0644);
+MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
+#endif
+
 static struct dentry *nt_debugfs_dir;
 
 /* Only two-ports NTB devices are supported */
@@ -188,6 +194,11 @@ struct ntb_transport_qp {
 	u64 tx_err_no_buf;
 	u64 tx_memcpy;
 	u64 tx_async;
+
+	bool use_msi;
+	int msi_irq;
+	struct ntb_msi_desc msi_desc;
+	struct ntb_msi_desc peer_msi_desc;
 };
 
 struct ntb_transport_mw {
@@ -221,6 +232,10 @@ struct ntb_transport_ctx {
 	u64 qp_bitmap;
 	u64 qp_bitmap_free;
 
+	bool use_msi;
+	unsigned int msi_spad_offset;
+	u64 msi_db_mask;
+
 	bool link_is_up;
 	struct delayed_work link_work;
 	struct work_struct link_cleanup;
@@ -667,6 +682,114 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
 	return 0;
 }
 
+static irqreturn_t ntb_transport_isr(int irq, void *dev)
+{
+	struct ntb_transport_qp *qp = dev;
+
+	tasklet_schedule(&qp->rxc_db_work);
+
+	return IRQ_HANDLED;
+}
+
+static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt,
+					    unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+	int spad = qp_num * 2 + nt->msi_spad_offset;
+
+	if (!nt->use_msi)
+		return;
+
+	if (spad >= ntb_spad_count(nt->ndev))
+		return;
+
+	qp->peer_msi_desc.addr_offset =
+		ntb_peer_spad_read(qp->ndev, PIDX, spad);
+	qp->peer_msi_desc.data =
+		ntb_peer_spad_read(qp->ndev, PIDX, spad + 1);
+
+	dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n",
+		qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data);
+
+	if (qp->peer_msi_desc.addr_offset) {
+		qp->use_msi = true;
+		dev_info(&qp->ndev->pdev->dev,
+			 "Using MSI interrupts for QP%d\n", qp_num);
+	}
+}
+
+static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt,
+				       unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+	int spad = qp_num * 2 + nt->msi_spad_offset;
+	int rc;
+
+	if (!nt->use_msi)
+		return;
+
+	if (spad >= ntb_spad_count(nt->ndev)) {
+		dev_warn_once(&qp->ndev->pdev->dev,
+			      "Not enough SPADS to use MSI interrupts\n");
+		return;
+	}
+
+	ntb_spad_write(qp->ndev, spad, 0);
+	ntb_spad_write(qp->ndev, spad + 1, 0);
+
+	if (!qp->msi_irq) {
+		qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr,
+						   KBUILD_MODNAME, qp,
+						   &qp->msi_desc);
+		if (qp->msi_irq < 0) {
+			dev_warn(&qp->ndev->pdev->dev,
+				 "Unable to allocate MSI interrupt for qp%d\n",
+				 qp_num);
+			return;
+		}
+	}
+
+	rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset);
+	if (rc)
+		goto err_free_interrupt;
+
+	rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data);
+	if (rc)
+		goto err_free_interrupt;
+
+	dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n",
+		qp_num, qp->msi_irq, qp->msi_desc.addr_offset,
+		qp->msi_desc.data);
+
+	return;
+
+err_free_interrupt:
+	devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp);
+}
+
+static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt)
+{
+	int i;
+
+	dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed");
+
+	for (i = 0; i < nt->qp_count; i++)
+		ntb_transport_setup_qp_peer_msi(nt, i);
+}
+
+static void ntb_transport_msi_desc_changed(void *data)
+{
+	struct ntb_transport_ctx *nt = data;
+	int i;
+
+	dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed");
+
+	for (i = 0; i < nt->qp_count; i++)
+		ntb_transport_setup_qp_msi(nt, i);
+
+	ntb_peer_db_set(nt->ndev, nt->msi_db_mask);
+}
+
 static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
 {
 	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
@@ -905,6 +1028,20 @@ static void ntb_transport_link_work(struct work_struct *work)
 	int rc = 0, i, spad;
 
 	/* send the local info, in the opposite order of the way we read it */
+
+	if (nt->use_msi) {
+		rc = ntb_msi_setup_mws(ndev);
+		if (rc) {
+			dev_warn(&pdev->dev,
+				 "Failed to register MSI memory window: %d\n",
+				 rc);
+			nt->use_msi = false;
+		}
+	}
+
+	for (i = 0; i < nt->qp_count; i++)
+		ntb_transport_setup_qp_msi(nt, i);
+
 	for (i = 0; i < nt->mw_count; i++) {
 		size = nt->mw_vec[i].phys_size;
 
@@ -962,6 +1099,7 @@ static void ntb_transport_link_work(struct work_struct *work)
 		struct ntb_transport_qp *qp = &nt->qp_vec[i];
 
 		ntb_transport_setup_qp_mw(nt, i);
+		ntb_transport_setup_qp_peer_msi(nt, i);
 
 		if (qp->client_ready)
 			schedule_delayed_work(&qp->link_work, 0);
@@ -1135,6 +1273,19 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 		return -ENOMEM;
 
 	nt->ndev = ndev;
+
+	/*
+	 * If we are using MSI, and have at least one extra memory window,
+	 * we will reserve the last MW for the MSI window.
+	 */
+	if (use_msi && mw_count > 1) {
+		rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed);
+		if (!rc) {
+			mw_count -= 1;
+			nt->use_msi = true;
+		}
+	}
+
 	spad_count = ntb_spad_count(ndev);
 
 	/* Limit the MW's based on the availability of scratchpads */
@@ -1148,6 +1299,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2;
 	nt->mw_count = min(mw_count, max_mw_count_for_spads);
 
+	nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH;
+
 	nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec),
 				  GFP_KERNEL, node);
 	if (!nt->mw_vec) {
@@ -1178,6 +1331,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	qp_bitmap = ntb_db_valid_mask(ndev);
 
 	qp_count = ilog2(qp_bitmap);
+	if (nt->use_msi) {
+		qp_count -= 1;
+		nt->msi_db_mask = 1 << qp_count;
+		ntb_db_clear_mask(ndev, nt->msi_db_mask);
+	}
+
 	if (max_num_clients && max_num_clients < qp_count)
 		qp_count = max_num_clients;
 	else if (nt->mw_count < qp_count)
@@ -1601,7 +1760,10 @@ static void ntb_tx_copy_callback(void *data,
 
 	iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
 
-	ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
+	if (qp->use_msi)
+		ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc);
+	else
+		ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
 
 	/* The entry length can only be zero if the packet is intended to be a
 	 * "link down" or similar.  Since no payload is being sent in these
@@ -2268,6 +2430,11 @@ static void ntb_transport_doorbell_callback(void *data, int vector)
 	u64 db_bits;
 	unsigned int qp_num;
 
+	if (ntb_db_read(nt->ndev) & nt->msi_db_mask) {
+		ntb_transport_msi_peer_desc_changed(nt);
+		ntb_db_clear(nt->ndev, nt->msi_db_mask);
+	}
+
 	db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free &
 		   ntb_db_vector_mask(nt->ndev, vector));
 
-- 
2.20.1

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

  parent reply index

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-22 21:05 [PATCH v4 00/10] Support using MSI interrupts in ntb_transport Logan Gunthorpe
2019-04-22 21:05 ` Logan Gunthorpe
     [not found] ` <20190422210528.15289-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
2019-04-22 21:05   ` [PATCH v4 01/10] PCI/MSI: Support allocating virtual MSI interrupts Logan Gunthorpe
2019-04-22 21:05     ` Logan Gunthorpe
2019-04-22 21:05   ` [PATCH v4 02/10] PCI/switchtec: Add module parameter to request more interrupts Logan Gunthorpe
2019-04-22 21:05     ` Logan Gunthorpe
2019-04-22 21:05   ` [PATCH v4 03/10] NTB: Introduce helper functions to calculate logical port number Logan Gunthorpe
2019-04-22 21:05     ` Logan Gunthorpe
2019-04-22 21:05   ` [PATCH v4 04/10] NTB: Introduce functions to calculate multi-port resource index Logan Gunthorpe
2019-04-22 21:05     ` Logan Gunthorpe
2019-04-22 21:05   ` [PATCH v4 08/10] NTB: Add ntb_msi_test support to ntb_test Logan Gunthorpe
2019-04-22 21:05     ` Logan Gunthorpe
2019-04-22 21:05 ` [PATCH v4 05/10] NTB: Rename ntb.c to support multiple source files in the module Logan Gunthorpe
2019-04-22 21:05   ` Logan Gunthorpe
2019-04-22 21:05 ` [PATCH v4 06/10] NTB: Introduce MSI library Logan Gunthorpe
2019-04-22 21:05   ` Logan Gunthorpe
2019-04-22 21:05 ` [PATCH v4 07/10] NTB: Introduce NTB MSI Test Client Logan Gunthorpe
2019-04-22 21:05   ` Logan Gunthorpe
2019-04-22 21:05 ` Logan Gunthorpe [this message]
2019-04-22 21:05   ` [PATCH v4 09/10] NTB: Add MSI interrupt support to ntb_transport Logan Gunthorpe
2019-04-22 21:05 ` [PATCH v4 10/10] NTB: Describe the ntb_msi_test client in the documentation Logan Gunthorpe
2019-04-22 21:05   ` Logan Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190422210528.15289-10-logang@deltatee.com \
    --to=logang@deltatee.com \
    --cc=allenbh@gmail.com \
    --cc=bhelgaas@google.com \
    --cc=dave.jiang@intel.com \
    --cc=epilmore@gigaio.com \
    --cc=fancer.lancer@gmail.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jdmason@kudzu.us \
    --cc=joro@8bytes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-ntb@googlegroups.com \
    --cc=linux-pci@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

IOMMU Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-iommu/0 linux-iommu/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-iommu linux-iommu/ https://lore.kernel.org/linux-iommu \
		iommu@lists.linux-foundation.org
	public-inbox-index linux-iommu

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.linux-foundation.lists.iommu


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git