All of lore.kernel.org
 help / color / mirror / Atom feed
From: Haggai Eran <haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
To: Roland Dreier <roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
	Shachar Raindel <raindel-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
	Haggai Eran <haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Subject: [PATCH v1 for-next 02/16] IB/core: Add flags for on demand paging support
Date: Thu,  3 Jul 2014 11:44:15 +0300	[thread overview]
Message-ID: <1404377069-20585-3-git-send-email-haggaie@mellanox.com> (raw)
In-Reply-To: <1404377069-20585-1-git-send-email-haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

From: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

* Add a configuration option for enable on-demand paging support in the
  infiniband subsystem (CONFIG_INFINIBAND_ON_DEMAND_PAGING). In a later patch,
  this configuration option will select the MMU_NOTIFIER configuration option
  to enable mmu notifiers.
* Add a flag for on demand paging (ODP) support in the IB device capabilities.
* Add a flag to request ODP MR in the access flags to reg_mr.
* Fail registrations done with the ODP flag when the low-level driver doesn't
  support this.
* Change the conditions in which an MR will be writable to explicitly
  specify the access flags. This is to avoid making an MR writable just
  because it is an ODP MR.
* Add a ODP capabilities to the extended query device verb.

Signed-off-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Shachar Raindel <raindel-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Haggai Eran <haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/Kconfig           | 10 ++++++++++
 drivers/infiniband/core/umem.c       |  8 +++++---
 drivers/infiniband/core/uverbs_cmd.c | 29 +++++++++++++++++++++++++++++
 include/rdma/ib_verbs.h              | 28 ++++++++++++++++++++++++++--
 include/uapi/rdma/ib_user_verbs.h    | 15 +++++++++++++++
 5 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 7708939..089a2c2 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -38,6 +38,16 @@ config INFINIBAND_USER_MEM
 	depends on INFINIBAND_USER_ACCESS != n
 	default y
 
+config INFINIBAND_ON_DEMAND_PAGING
+	bool "InfiniBand on-demand paging support"
+	depends on INFINIBAND_USER_MEM
+	default y
+	---help---
+	  On demand paging support for the InfiniBand subsystem.
+	  Together with driver support this allows registration of
+	  memory regions without pinning their pages, fetching the
+	  pages on demand instead.
+
 config INFINIBAND_ADDR_TRANS
 	bool
 	depends on INFINIBAND
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index a3a2e9c..1fba9d3 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -106,13 +106,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	umem->offset    = addr & ~PAGE_MASK;
 	umem->page_size = PAGE_SIZE;
 	/*
-	 * We ask for writable memory if any access flags other than
-	 * "remote read" are set.  "Local write" and "remote write"
+	 * We ask for writable memory if any of the following
+	 * access flags are set.  "Local write" and "remote write"
 	 * obviously require write access.  "Remote atomic" can do
 	 * things like fetch and add, which will modify memory, and
 	 * "MW bind" can change permissions by binding a window.
 	 */
-	umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
+	umem->writable  = !!(access &
+		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
+		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
 
 	/* We assume the memory is from hugetlb until proved otherwise */
 	umem->hugetlb   = 1;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a5fb347..af0c51b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -955,6 +955,22 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 		goto err_free;
 	}
 
+
+	if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+		struct ib_device_attr attr;
+		ret = ib_query_device(pd->device, &attr);
+		if (ret || !(attr.device_cap_flags &
+				IB_DEVICE_ON_DEMAND_PAGING)) {
+			ret = -EINVAL;
+			goto err_put;
+		}
+#else
+		ret = -EINVAL;
+		goto err_put;
+#endif
+	}
+
 	mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
 				     cmd.access_flags, &udata);
 	if (IS_ERR(mr)) {
@@ -3189,6 +3205,19 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 
 	resp.comp_mask = 0;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (cmd.comp_mask & IB_USER_VERBS_EX_QUERY_DEVICE_ODP) {
+		resp.odp_caps.general_caps = attr.odp_caps.general_caps;
+		resp.odp_caps.per_transport_caps.rc_odp_caps =
+			attr.odp_caps.per_transport_caps.rc_odp_caps;
+		resp.odp_caps.per_transport_caps.uc_odp_caps =
+			attr.odp_caps.per_transport_caps.uc_odp_caps;
+		resp.odp_caps.per_transport_caps.ud_odp_caps =
+			attr.odp_caps.per_transport_caps.ud_odp_caps;
+		resp.comp_mask |= IB_USER_VERBS_EX_QUERY_DEVICE_ODP;
+	}
+#endif
+
 	err = ib_copy_to_udata(ucore,
 			       &resp, sizeof(resp));
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7ccef34..7c5e9e0 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -123,7 +123,8 @@ enum ib_device_cap_flags {
 	IB_DEVICE_MEM_WINDOW_TYPE_2A	= (1<<23),
 	IB_DEVICE_MEM_WINDOW_TYPE_2B	= (1<<24),
 	IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
-	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30)
+	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30),
+	IB_DEVICE_ON_DEMAND_PAGING	= (1<<31),
 };
 
 enum ib_signature_prot_cap {
@@ -143,6 +144,27 @@ enum ib_atomic_cap {
 	IB_ATOMIC_GLOB
 };
 
+enum ib_odp_general_cap_bits {
+	IB_ODP_SUPPORT = 1 << 0,
+};
+
+enum ib_odp_transport_cap_bits {
+	IB_ODP_SUPPORT_SEND	= 1 << 0,
+	IB_ODP_SUPPORT_RECV	= 1 << 1,
+	IB_ODP_SUPPORT_WRITE	= 1 << 2,
+	IB_ODP_SUPPORT_READ	= 1 << 3,
+	IB_ODP_SUPPORT_ATOMIC	= 1 << 4,
+};
+
+struct ib_odp_caps {
+	uint64_t general_caps;
+	struct {
+		uint32_t  rc_odp_caps;
+		uint32_t  uc_odp_caps;
+		uint32_t  ud_odp_caps;
+	} per_transport_caps;
+};
+
 struct ib_device_attr {
 	u64			fw_ver;
 	__be64			sys_image_guid;
@@ -186,6 +208,7 @@ struct ib_device_attr {
 	u8			local_ca_ack_delay;
 	int			sig_prot_cap;
 	int			sig_guard_cap;
+	struct ib_odp_caps	odp_caps;
 };
 
 enum ib_mtu {
@@ -1077,7 +1100,8 @@ enum ib_access_flags {
 	IB_ACCESS_REMOTE_READ	= (1<<2),
 	IB_ACCESS_REMOTE_ATOMIC	= (1<<3),
 	IB_ACCESS_MW_BIND	= (1<<4),
-	IB_ZERO_BASED		= (1<<5)
+	IB_ZERO_BASED		= (1<<5),
+	IB_ACCESS_ON_DEMAND     = (1<<6),
 };
 
 struct ib_phys_buf {
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 03052b4..f6a4d84 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -799,6 +799,20 @@ struct ib_uverbs_destroy_flow  {
 	__u32 flow_handle;
 };
 
+struct ib_uverbs_odp_caps {
+	__u64 general_caps;
+	struct {
+		__u32 rc_odp_caps;
+		__u32 uc_odp_caps;
+		__u32 ud_odp_caps;
+	} per_transport_caps;
+	__u32 reserved;
+};
+
+enum {
+	IB_USER_VERBS_EX_QUERY_DEVICE_ODP =		1ULL << 0,
+};
+
 struct ib_uverbs_ex_query_device {
 	__u64 comp_mask;
 };
@@ -806,6 +820,7 @@ struct ib_uverbs_ex_query_device {
 struct ib_uverbs_ex_query_device_resp {
 	struct ib_uverbs_query_device_resp base;
 	__u64 comp_mask;
+	struct ib_uverbs_odp_caps odp_caps;
 };
 
 struct ib_uverbs_create_srq {
-- 
1.7.11.2

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2014-07-03  8:44 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-03  8:44 [PATCH v1 for-next 00/16] On demand paging Haggai Eran
     [not found] ` <1404377069-20585-1-git-send-email-haggaie-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-07-03  8:44   ` [PATCH v1 for-next 01/16] IB/core: Add an extended user verb to query device attributes Haggai Eran
2014-07-03  8:44   ` Haggai Eran [this message]
2014-07-03  8:44   ` [PATCH v1 for-next 03/16] IB/core: Replace ib_umem's offset field with a full address Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 04/16] IB/core: Add umem function to read data from user-space Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 05/16] IB/core: Add support for on demand paging regions Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 06/16] IB/core: Implement support for MMU notifiers regarding " Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 07/16] IB/mlx5: Enhance UMR support to allow partial page table update Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 08/16] net/mlx5_core: Add support for page faults events and low level handling Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 09/16] IB/mlx5: Implement the ODP capability query verb Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 10/16] IB/mlx5: Changes in memory region creation to support on-demand paging Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 11/16] IB/mlx5: Add mlx5_ib_update_mtt to update page tables after creation Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 12/16] IB/mlx5: Add function to read WQE from user-space Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 13/16] IB/mlx5: Page faults handling infrastructure Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 14/16] IB/mlx5: Handle page faults Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 15/16] IB/mlx5: Add support for RDMA write responder " Haggai Eran
2014-07-03  8:44   ` [PATCH v1 for-next 16/16] IB/mlx5: Implement on demand paging by adding support for MMU notifiers Haggai Eran
2014-08-18  7:10   ` [PATCH v1 for-next 00/16] On demand paging Sagi Grimberg
2014-09-02 14:23   ` Or Gerlitz
     [not found]     ` <5405D2D8.1040700-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-09-03 20:21       ` Or Gerlitz
2014-09-03 20:21         ` Or Gerlitz
     [not found]         ` <CAOha14xthZHSpS_T+XRgZcPqwaZvtMw0iGTzKjTyjdBuLhJ4Eg@mail.gmail.com>
     [not found]           ` <CAOha14xthZHSpS_T+XRgZcPqwaZvtMw0iGTzKjTyjdBuLhJ4Eg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-03 21:15             ` Roland Dreier
2014-09-03 21:15               ` Roland Dreier
     [not found]               ` <CAG4TOxO54LJgSjFFDge4X9OpaBNrM1z=iGSV0GEY+0iFFSs9sw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-04 17:45                 ` Jerome Glisse
2014-09-04 17:45                   ` Jerome Glisse
2014-09-09 14:21                 ` Haggai Eran
2014-09-09 14:21                   ` Haggai Eran
     [not found]                   ` <540F0CD8.9070002-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-09-10  8:51                     ` Haggai Eran
2014-09-10  8:51                       ` Haggai Eran
     [not found]                       ` <54101123.2080208-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2014-09-10  9:28                         ` Sagi Grimberg
2014-09-10  9:28                           ` Sagi Grimberg
2014-09-12 21:16                     ` Or Gerlitz
2014-09-12 21:16                       ` Or Gerlitz
     [not found]                       ` <CAJ3xEMhSZcrN0_nurc=-sUT+KM3vFgAD6XJjjA-eK0-Jyeut0Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-17 15:18                         ` Or Gerlitz
2014-09-17 15:18                           ` Or Gerlitz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1404377069-20585-3-git-send-email-haggaie@mellanox.com \
    --to=haggaie-vpraknaxozvwk0htik3j/w@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=raindel-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org \
    --cc=sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.