linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Logan Gunthorpe <logang@deltatee.com>
To: Christoph Hellwig <hch@lst.de>, Sagi Grimberg <sagi@grimberg.me>,
	"James E.J. Bottomley" <jejb@linux.vnet.ibm.com>,
	"Martin K. Petersen" <martin.petersen@oracle.com>,
	Jens Axboe <axboe@kernel.dk>,
	Steve Wise <swise@opengridcomputing.com>,
	Stephen Bates <sbates@raithlin.com>,
	Max Gurtovoy <maxg@mellanox.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Keith Busch <keith.busch@intel.com>,
	Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: linux-pci@vger.kernel.org, linux-scsi@vger.kernel.org,
	linux-nvme@lists.infradead.org, linux-rdma@vger.kernel.org,
	linux-nvdimm@ml01.01.org, linux-kernel@vger.kernel.org,
	Logan Gunthorpe <logang@deltatee.com>
Subject: [RFC 3/8] nvmet: Use p2pmem in nvme target
Date: Thu, 30 Mar 2017 16:12:34 -0600	[thread overview]
Message-ID: <1490911959-5146-4-git-send-email-logang@deltatee.com> (raw)
In-Reply-To: <1490911959-5146-1-git-send-email-logang@deltatee.com>

We create a configfs attribute in each nvme-fabrics target port to
enable p2p memory use. When enabled, the port will only then use the
p2p memory if a p2p memory device can be found which is behind the
same switch as the RDMA port and all the block devices in use. If
the user enabled it an no devices are found, then the system will
silently fall back on using regular memory.

If appropriate, that port will allocate memory for the RDMA buffers
for queues from the p2pmem device falling back to system memory should
anything fail.

Ideally, we'd want to use an NVME CMB buffer as p2p memory. This would
save an extra PCI transfer as the NVME card could just take the data
out of it's own memory. However, at this time, cards with CMB buffers
don't seem to be available.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Stephen Bates <sbates@raithlin.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
 drivers/nvme/target/configfs.c | 31 +++++++++++++++
 drivers/nvme/target/nvmet.h    |  1 +
 drivers/nvme/target/rdma.c     | 90 ++++++++++++++++++++++++++++++++++++++----
 3 files changed, 114 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index be8c800..e61a7f4 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -777,12 +777,43 @@ static void nvmet_port_release(struct config_item *item)
 	kfree(port);
 }
 
+#ifdef CONFIG_P2PMEM
+static ssize_t nvmet_allow_p2pmem_show(struct config_item *item, char *page)
+{
+	return sprintf(page, "%d\n", to_nvmet_port(item)->allow_p2pmem);
+}
+
+static ssize_t nvmet_allow_p2pmem_store(struct config_item *item,
+					const char *page, size_t count)
+{
+	struct nvmet_port *port = to_nvmet_port(item);
+	bool allow;
+	int ret;
+
+	ret = strtobool(page, &allow);
+	if (ret)
+		return ret;
+
+	down_write(&nvmet_config_sem);
+	port->allow_p2pmem = allow;
+	up_write(&nvmet_config_sem);
+
+	return count;
+}
+CONFIGFS_ATTR(nvmet_, allow_p2pmem);
+#endif
+
 static struct configfs_attribute *nvmet_port_attrs[] = {
 	&nvmet_attr_addr_adrfam,
 	&nvmet_attr_addr_treq,
 	&nvmet_attr_addr_traddr,
 	&nvmet_attr_addr_trsvcid,
 	&nvmet_attr_addr_trtype,
+
+	#ifdef CONFIG_P2PMEM
+	&nvmet_attr_allow_p2pmem,
+	#endif
+
 	NULL,
 };
 
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index f7ff15f..ab67175 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -95,6 +95,7 @@ struct nvmet_port {
 	struct list_head		referrals;
 	void				*priv;
 	bool				enabled;
+	bool				allow_p2pmem;
 };
 
 static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index ecc4fe8..7fd4840 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -23,6 +23,7 @@
 #include <linux/string.h>
 #include <linux/wait.h>
 #include <linux/inet.h>
+#include <linux/p2pmem.h>
 #include <asm/unaligned.h>
 
 #include <rdma/ib_verbs.h>
@@ -64,6 +65,7 @@ struct nvmet_rdma_rsp {
 	struct rdma_rw_ctx	rw;
 
 	struct nvmet_req	req;
+	struct p2pmem_dev       *p2pmem;
 
 	u8			n_rdma;
 	u32			flags;
@@ -107,6 +109,8 @@ struct nvmet_rdma_queue {
 	int			send_queue_size;
 
 	struct list_head	queue_list;
+
+	struct p2pmem_dev	*p2pmem;
 };
 
 struct nvmet_rdma_device {
@@ -185,7 +189,8 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
 	spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
 }
 
-static void nvmet_rdma_free_sgl(struct scatterlist *sgl, unsigned int nents)
+static void nvmet_rdma_free_sgl(struct scatterlist *sgl, unsigned int nents,
+				struct p2pmem_dev *p2pmem)
 {
 	struct scatterlist *sg;
 	int count;
@@ -193,13 +198,17 @@ static void nvmet_rdma_free_sgl(struct scatterlist *sgl, unsigned int nents)
 	if (!sgl || !nents)
 		return;
 
-	for_each_sg(sgl, sg, nents, count)
-		__free_page(sg_page(sg));
+	for_each_sg(sgl, sg, nents, count) {
+		if (p2pmem)
+			p2pmem_free_page(p2pmem, sg_page(sg));
+		else
+			__free_page(sg_page(sg));
+	}
 	kfree(sgl);
 }
 
 static int nvmet_rdma_alloc_sgl(struct scatterlist **sgl, unsigned int *nents,
-		u32 length)
+		u32 length, struct p2pmem_dev *p2pmem)
 {
 	struct scatterlist *sg;
 	struct page *page;
@@ -216,7 +225,11 @@ static int nvmet_rdma_alloc_sgl(struct scatterlist **sgl, unsigned int *nents,
 	while (length) {
 		u32 page_len = min_t(u32, length, PAGE_SIZE);
 
-		page = alloc_page(GFP_KERNEL);
+		if (p2pmem)
+			page = p2pmem_alloc_page(p2pmem);
+		else
+			page = alloc_page(GFP_KERNEL);
+
 		if (!page)
 			goto out_free_pages;
 
@@ -231,7 +244,10 @@ static int nvmet_rdma_alloc_sgl(struct scatterlist **sgl, unsigned int *nents,
 out_free_pages:
 	while (i > 0) {
 		i--;
-		__free_page(sg_page(&sg[i]));
+		if (p2pmem)
+			p2pmem_free_page(p2pmem, sg_page(&sg[i]));
+		else
+			__free_page(sg_page(&sg[i]));
 	}
 	kfree(sg);
 out:
@@ -484,7 +500,8 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
 	}
 
 	if (rsp->req.sg != &rsp->cmd->inline_sg)
-		nvmet_rdma_free_sgl(rsp->req.sg, rsp->req.sg_cnt);
+		nvmet_rdma_free_sgl(rsp->req.sg, rsp->req.sg_cnt,
+				    rsp->p2pmem);
 
 	if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
 		nvmet_rdma_process_wr_wait_list(queue);
@@ -625,8 +642,16 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
 	if (!len)
 		return 0;
 
+	rsp->p2pmem = rsp->queue->p2pmem;
 	status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
-			len);
+			len, rsp->p2pmem);
+
+	if (status && rsp->p2pmem) {
+		rsp->p2pmem = NULL;
+		status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
+					      len, rsp->p2pmem);
+	}
+
 	if (status)
 		return status;
 
@@ -984,6 +1009,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
 				!queue->host_qid);
 	}
 	nvmet_rdma_free_rsps(queue);
+	p2pmem_put(queue->p2pmem);
 	ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
 	kfree(queue);
 }
@@ -1179,6 +1205,52 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
 	return ret;
 }
 
+/*
+ * If allow_p2pmem is set, we will try to use P2P memory for our
+ * sgl lists. This requires the p2pmem device to be compatible with
+ * the backing device for every namespace this device will support.
+ * If not, we fall back on using system memory.
+ */
+static void nvmet_rdma_queue_setup_p2pmem(struct nvmet_rdma_queue *queue)
+{
+	struct device **dma_devs;
+	struct nvmet_ns *ns;
+	int ndevs = 1;
+	int i = 0;
+	struct nvmet_subsys_link *s;
+
+	if (!queue->port->allow_p2pmem)
+		return;
+
+	list_for_each_entry(s, &queue->port->subsystems, entry) {
+		list_for_each_entry_rcu(ns, &s->subsys->namespaces, dev_link) {
+			ndevs++;
+		}
+	}
+
+	dma_devs = kmalloc((ndevs + 1) * sizeof(*dma_devs), GFP_KERNEL);
+	if (!dma_devs)
+		return;
+
+	dma_devs[i++] = &queue->dev->device->dev;
+
+	list_for_each_entry(s, &queue->port->subsystems, entry) {
+		list_for_each_entry_rcu(ns, &s->subsys->namespaces, dev_link) {
+			dma_devs[i++] = disk_to_dev(ns->bdev->bd_disk);
+		}
+	}
+
+	dma_devs[i++] = NULL;
+
+	queue->p2pmem = p2pmem_find_compat(dma_devs);
+
+	if (queue->p2pmem)
+		pr_debug("using %s for rdma nvme target queue",
+			 dev_name(&queue->p2pmem->dev));
+
+	kfree(dma_devs);
+}
+
 static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event)
 {
@@ -1199,6 +1271,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 	}
 	queue->port = cm_id->context;
 
+	nvmet_rdma_queue_setup_p2pmem(queue);
+
 	ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
 	if (ret)
 		goto release_queue;
-- 
2.1.4

  parent reply	other threads:[~2017-03-30 22:13 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-30 22:12 [RFC 0/8] Copy Offload with Peer-to-Peer PCI Memory Logan Gunthorpe
2017-03-30 22:12 ` [RFC 1/8] Introduce Peer-to-Peer memory (p2pmem) device Logan Gunthorpe
2017-03-31 18:49   ` Sinan Kaya
2017-03-31 21:23     ` Logan Gunthorpe
2017-03-31 21:38       ` Sinan Kaya
2017-03-31 22:42         ` Logan Gunthorpe
2017-03-31 23:51           ` Sinan Kaya
2017-04-01  1:57             ` Logan Gunthorpe
2017-04-01  2:17               ` okaya
2017-04-01 22:16                 ` Logan Gunthorpe
2017-04-02  2:26                   ` Sinan Kaya
2017-04-02 17:21                     ` Logan Gunthorpe
2017-04-02 21:03                       ` Sinan Kaya
2017-04-03  4:26                         ` Logan Gunthorpe
2017-04-25 11:58                           ` Marta Rybczynska
2017-04-25 16:58                             ` Logan Gunthorpe
2017-03-30 22:12 ` [RFC 2/8] cxgb4: setup pcie memory window 4 and create p2pmem region Logan Gunthorpe
2017-04-04 10:42   ` Sagi Grimberg
2017-04-04 15:56     ` Logan Gunthorpe
2017-04-05 15:41     ` Steve Wise
2017-03-30 22:12 ` Logan Gunthorpe [this message]
2017-04-04 10:40   ` [RFC 3/8] nvmet: Use p2pmem in nvme target Sagi Grimberg
2017-04-04 16:16     ` Logan Gunthorpe
2017-04-06  5:47       ` Sagi Grimberg
2017-04-06 15:52         ` Logan Gunthorpe
2017-03-30 22:12 ` [RFC 4/8] p2pmem: Add debugfs "stats" file Logan Gunthorpe
2017-04-04 10:46   ` Sagi Grimberg
2017-04-04 17:25     ` Logan Gunthorpe
2017-04-05 15:43     ` Steve Wise
2017-03-30 22:12 ` [RFC 5/8] scatterlist: Modify SG copy functions to support io memory Logan Gunthorpe
2017-03-31  7:09   ` Christoph Hellwig
2017-03-31 15:41     ` Logan Gunthorpe
2017-04-03 21:20       ` Logan Gunthorpe
2017-04-03 21:44         ` Dan Williams
2017-04-03 22:10           ` Logan Gunthorpe
2017-04-03 22:47             ` Dan Williams
2017-04-03 23:12               ` Logan Gunthorpe
2017-04-04  0:07                 ` Dan Williams
2017-04-07 17:59                   ` Logan Gunthorpe
2017-03-30 22:12 ` [RFC 6/8] nvmet: Be careful about using iomem accesses when dealing with p2pmem Logan Gunthorpe
2017-04-04 10:59   ` Sagi Grimberg
2017-04-04 15:46     ` Jason Gunthorpe
2017-04-04 17:21       ` Logan Gunthorpe
2017-04-06  5:33       ` Sagi Grimberg
2017-04-06 16:02         ` Logan Gunthorpe
2017-04-06 16:35         ` Jason Gunthorpe
2017-04-07 11:19         ` Stephen  Bates
2017-04-10  8:29           ` Sagi Grimberg
2017-04-10 16:03             ` Logan Gunthorpe
2017-03-30 22:12 ` [RFC 7/8] p2pmem: Support device removal Logan Gunthorpe
2017-03-30 22:12 ` [RFC 8/8] p2pmem: Added char device user interface Logan Gunthorpe
2017-04-12  5:22 ` [RFC 0/8] Copy Offload with Peer-to-Peer PCI Memory Benjamin Herrenschmidt
2017-04-12 17:09   ` Logan Gunthorpe
2017-04-12 21:55     ` Benjamin Herrenschmidt
2017-04-13 21:22       ` Logan Gunthorpe
2017-04-13 22:37         ` Benjamin Herrenschmidt
2017-04-13 23:26         ` Bjorn Helgaas
2017-04-14  4:16           ` Jason Gunthorpe
2017-04-14  4:40             ` Logan Gunthorpe
2017-04-14 11:37               ` Benjamin Herrenschmidt
2017-04-14 11:39                 ` Benjamin Herrenschmidt
2017-04-14 11:37             ` Benjamin Herrenschmidt
2017-04-14 17:30               ` Logan Gunthorpe
2017-04-14 19:04                 ` Bjorn Helgaas
2017-04-14 22:07                   ` Benjamin Herrenschmidt
2017-04-15 17:41                     ` Logan Gunthorpe
2017-04-15 22:09                       ` Dan Williams
2017-04-16  3:01                         ` Benjamin Herrenschmidt
2017-04-16  4:46                           ` Logan Gunthorpe
2017-04-16 15:53                           ` Dan Williams
2017-04-16 16:34                             ` Logan Gunthorpe
2017-04-16 22:31                               ` Benjamin Herrenschmidt
2017-04-24  7:36                                 ` Knut Omang
2017-04-24 16:14                                   ` Logan Gunthorpe
2017-04-25  6:30                                     ` Knut Omang
2017-04-25 17:03                                       ` Logan Gunthorpe
2017-04-25 21:23                                         ` Stephen  Bates
2017-04-25 21:23                                   ` Stephen  Bates
2017-04-16 22:26                             ` Benjamin Herrenschmidt
2017-04-15 22:17                       ` Benjamin Herrenschmidt
2017-04-16  5:36                         ` Logan Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1490911959-5146-4-git-send-email-logang@deltatee.com \
    --to=logang@deltatee.com \
    --cc=axboe@kernel.dk \
    --cc=dan.j.williams@intel.com \
    --cc=hch@lst.de \
    --cc=jejb@linux.vnet.ibm.com \
    --cc=jgunthorpe@obsidianresearch.com \
    --cc=keith.busch@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@ml01.01.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=maxg@mellanox.com \
    --cc=sagi@grimberg.me \
    --cc=sbates@raithlin.com \
    --cc=swise@opengridcomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).