All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] Current XRC queue
@ 2010-01-25 19:01 Roland Dreier
       [not found] ` <1264446094-4460-1-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Roland Dreier @ 2010-01-25 19:01 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Hi everyone,

I finally found time to finish going over the base XRC patches.  I fixed
the "userspace can destroy an XRCD handle while still having QPs/SRQs
referring to it" problem with a reference count, rather than the more
complicated method used by the original patches.

I think the next step would be for someone (probably not me if we want
this to happen soon) to port the rest of the mlx4 XRC patch set on top
of this, ideally trying to factor out any reference-counting-ish code
away from the device-specific driver and into common code.  Then we
should finally be able to merge this and move on to the IBoE changes.

Thanks,
  Roland

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/4] IB/core: XRC base implementation
       [not found] ` <1264446094-4460-1-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
@ 2010-01-25 19:01   ` Roland Dreier
  2010-01-25 19:01   ` [PATCH 2/4] IB/uverbs: Support for XRC Roland Dreier
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 15+ messages in thread
From: Roland Dreier @ 2010-01-25 19:01 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jack Morgenstein

From: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>

Add the core implementation for XRC ("eXtended reliable connected")
transport.  XRC provides better scalability by allowing senders to
specify which shared receive queue (SRQ) should be used to receive a
message, which essentially allows one transport context (QP
connection) to serve multiple destinations (as long as they shared an
adapter, of course).

A few new concepts are introduced to support this:

 - A new device capability flag, IB_DEVICE_XRC, which low-level drivers
   set to indicate that a device supports XRC.
 - A new object type: XRC domains (struct ib_xrcd), and new verbs
   ib_alloc_xrcd()/ib_dealloc_xrcd().  XRCDs are used to limit which XRC
   SRQs an incoming message can target.
 - A new QP type, IB_QPT_XRC, which is used to create QPs that use the
   XRC transport.  Creating XRC QPs requires an XRCD to be specified.
 - A new verb, ib_create_xrc_srq(), which is used to create XRC SRQs.
   XRC SRQs have an associated SRQ number (SRQN), which is included in
   incoming messages to target the message to a given SRQ.

Signed-off-by: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
Signed-off-by: Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/core/verbs.c |  138 +++++++++++++++++++++++++++++++++++++--
 include/rdma/ib_verbs.h         |   60 ++++++++++++++++-
 2 files changed, 190 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index a7da9be..b75193c 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -234,6 +234,8 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
 		srq->uobject       = NULL;
 		srq->event_handler = srq_init_attr->event_handler;
 		srq->srq_context   = srq_init_attr->srq_context;
+		srq->xrc_cq	   = NULL;
+		srq->xrcd	   = NULL;
 		atomic_inc(&pd->usecnt);
 		atomic_set(&srq->usecnt, 0);
 	}
@@ -242,6 +244,36 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
 }
 EXPORT_SYMBOL(ib_create_srq);
 
+struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
+				 struct ib_cq *xrc_cq,
+				 struct ib_xrcd *xrcd,
+				 struct ib_srq_init_attr *srq_init_attr)
+{
+	struct ib_srq *srq;
+
+	if (!pd->device->create_xrc_srq)
+		return ERR_PTR(-ENOSYS);
+
+	srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, srq_init_attr, NULL);
+
+	if (!IS_ERR(srq)) {
+		srq->device	   = pd->device;
+		srq->pd		   = pd;
+		srq->uobject	   = NULL;
+		srq->event_handler = srq_init_attr->event_handler;
+		srq->srq_context   = srq_init_attr->srq_context;
+		srq->xrc_cq	   = xrc_cq;
+		srq->xrcd	   = xrcd;
+		atomic_inc(&pd->usecnt);
+		atomic_inc(&xrcd->usecnt);
+		atomic_inc(&xrc_cq->usecnt);
+		atomic_set(&srq->usecnt, 0);
+	}
+
+	return srq;
+}
+EXPORT_SYMBOL(ib_create_xrc_srq);
+
 int ib_modify_srq(struct ib_srq *srq,
 		  struct ib_srq_attr *srq_attr,
 		  enum ib_srq_attr_mask srq_attr_mask)
@@ -263,16 +295,25 @@ EXPORT_SYMBOL(ib_query_srq);
 int ib_destroy_srq(struct ib_srq *srq)
 {
 	struct ib_pd *pd;
+	struct ib_cq *xrc_cq;
+	struct ib_xrcd *xrcd;
 	int ret;
 
 	if (atomic_read(&srq->usecnt))
 		return -EBUSY;
 
-	pd = srq->pd;
+	pd     = srq->pd;
+	xrc_cq = srq->xrc_cq;
+	xrcd   = srq->xrcd;
 
 	ret = srq->device->destroy_srq(srq);
-	if (!ret)
+	if (!ret) {
 		atomic_dec(&pd->usecnt);
+		if (xrc_cq)
+			atomic_dec(&xrc_cq->usecnt);
+		if (xrcd)
+			atomic_dec(&xrcd->usecnt);
+	}
 
 	return ret;
 }
@@ -297,11 +338,17 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
 		qp->event_handler = qp_init_attr->event_handler;
 		qp->qp_context    = qp_init_attr->qp_context;
 		qp->qp_type	  = qp_init_attr->qp_type;
+		if (qp->qp_type == IB_QPT_XRC)
+			qp->xrcd  = qp_init_attr->xrcd;
+		else
+			qp->xrcd  = NULL;
 		atomic_inc(&pd->usecnt);
 		atomic_inc(&qp_init_attr->send_cq->usecnt);
 		atomic_inc(&qp_init_attr->recv_cq->usecnt);
 		if (qp_init_attr->srq)
 			atomic_inc(&qp_init_attr->srq->usecnt);
+		if (qp->xrcd)
+			atomic_inc(&qp->xrcd->usecnt);
 	}
 
 	return qp;
@@ -327,6 +374,9 @@ static const struct {
 				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
 						IB_QP_PORT			|
 						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_XRC] = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
@@ -349,6 +399,9 @@ static const struct {
 				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
 						IB_QP_PORT			|
 						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_XRC] = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
@@ -368,6 +421,12 @@ static const struct {
 						IB_QP_RQ_PSN			|
 						IB_QP_MAX_DEST_RD_ATOMIC	|
 						IB_QP_MIN_RNR_TIMER),
+				[IB_QPT_XRC] = (IB_QP_AV			|
+						IB_QP_PATH_MTU			|
+						IB_QP_DEST_QPN			|
+						IB_QP_RQ_PSN			|
+						IB_QP_MAX_DEST_RD_ATOMIC	|
+						IB_QP_MIN_RNR_TIMER),
 			},
 			.opt_param = {
 				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
@@ -378,6 +437,9 @@ static const struct {
 				 [IB_QPT_RC]  = (IB_QP_ALT_PATH			|
 						 IB_QP_ACCESS_FLAGS		|
 						 IB_QP_PKEY_INDEX),
+				 [IB_QPT_XRC] = (IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PKEY_INDEX),
 				 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
 						 IB_QP_QKEY),
 				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
@@ -398,6 +460,11 @@ static const struct {
 						IB_QP_RNR_RETRY			|
 						IB_QP_SQ_PSN			|
 						IB_QP_MAX_QP_RD_ATOMIC),
+				[IB_QPT_XRC] = (IB_QP_TIMEOUT			|
+						IB_QP_RETRY_CNT			|
+						IB_QP_RNR_RETRY			|
+						IB_QP_SQ_PSN			|
+						IB_QP_MAX_QP_RD_ATOMIC),
 				[IB_QPT_SMI] = IB_QP_SQ_PSN,
 				[IB_QPT_GSI] = IB_QP_SQ_PSN,
 			},
@@ -413,6 +480,11 @@ static const struct {
 						 IB_QP_ACCESS_FLAGS		|
 						 IB_QP_MIN_RNR_TIMER		|
 						 IB_QP_PATH_MIG_STATE),
+				 [IB_QPT_XRC] = (IB_QP_CUR_STATE		|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_MIN_RNR_TIMER		|
+						IB_QP_PATH_MIG_STATE),
 				 [IB_QPT_SMI] = (IB_QP_CUR_STATE		|
 						 IB_QP_QKEY),
 				 [IB_QPT_GSI] = (IB_QP_CUR_STATE		|
@@ -437,6 +509,11 @@ static const struct {
 						IB_QP_ALT_PATH			|
 						IB_QP_PATH_MIG_STATE		|
 						IB_QP_MIN_RNR_TIMER),
+				[IB_QPT_XRC] = (IB_QP_CUR_STATE			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_ALT_PATH			|
+						IB_QP_PATH_MIG_STATE		|
+						IB_QP_MIN_RNR_TIMER),
 				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
@@ -449,6 +526,7 @@ static const struct {
 				[IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
 				[IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
 				[IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_XRC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
 				[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
 				[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
 			}
@@ -471,6 +549,11 @@ static const struct {
 						IB_QP_ACCESS_FLAGS		|
 						IB_QP_MIN_RNR_TIMER		|
 						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_XRC] = (IB_QP_CUR_STATE			|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_MIN_RNR_TIMER		|
+						IB_QP_PATH_MIG_STATE),
 				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
@@ -499,6 +582,18 @@ static const struct {
 						IB_QP_PKEY_INDEX		|
 						IB_QP_MIN_RNR_TIMER		|
 						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_XRC] = (IB_QP_PORT			|
+						IB_QP_AV			|
+						IB_QP_TIMEOUT			|
+						IB_QP_RETRY_CNT			|
+						IB_QP_RNR_RETRY			|
+						IB_QP_MAX_QP_RD_ATOMIC		|
+						IB_QP_MAX_DEST_RD_ATOMIC	|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PKEY_INDEX		|
+						IB_QP_MIN_RNR_TIMER		|
+						IB_QP_PATH_MIG_STATE),
 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
@@ -583,12 +678,14 @@ int ib_destroy_qp(struct ib_qp *qp)
 	struct ib_pd *pd;
 	struct ib_cq *scq, *rcq;
 	struct ib_srq *srq;
+	struct ib_xrcd *xrcd;
 	int ret;
 
-	pd  = qp->pd;
-	scq = qp->send_cq;
-	rcq = qp->recv_cq;
-	srq = qp->srq;
+	pd   = qp->pd;
+	scq  = qp->send_cq;
+	rcq  = qp->recv_cq;
+	srq  = qp->srq;
+	xrcd = qp->xrcd;
 
 	ret = qp->device->destroy_qp(qp);
 	if (!ret) {
@@ -597,6 +694,8 @@ int ib_destroy_qp(struct ib_qp *qp)
 		atomic_dec(&rcq->usecnt);
 		if (srq)
 			atomic_dec(&srq->usecnt);
+		if (xrcd)
+			atomic_dec(&xrcd->usecnt);
 	}
 
 	return ret;
@@ -904,3 +1003,30 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
 	return qp->device->detach_mcast(qp, gid, lid);
 }
 EXPORT_SYMBOL(ib_detach_mcast);
+
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+{
+	struct ib_xrcd *xrcd;
+
+	if (!device->alloc_xrcd)
+		return ERR_PTR(-ENOSYS);
+
+	xrcd = device->alloc_xrcd(device, NULL, NULL);
+	if (!IS_ERR(xrcd)) {
+		xrcd->device  = device;
+		xrcd->uobject = NULL;
+		atomic_set(&xrcd->usecnt, 0);
+	}
+
+	return xrcd;
+}
+EXPORT_SYMBOL(ib_alloc_xrcd);
+
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+	if (atomic_read(&xrcd->usecnt))
+		return -EBUSY;
+
+	return xrcd->device->dealloc_xrcd(xrcd);
+}
+EXPORT_SYMBOL(ib_dealloc_xrcd);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 09509ed..1d843c3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -103,6 +103,7 @@ enum ib_device_cap_flags {
 	 */
 	IB_DEVICE_UD_IP_CSUM		= (1<<18),
 	IB_DEVICE_UD_TSO		= (1<<19),
+	IB_DEVICE_XRC			= (1<<20),
 	IB_DEVICE_MEM_MGT_EXTENSIONS	= (1<<21),
 	IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
 };
@@ -551,6 +552,7 @@ enum ib_qp_type {
 	IB_QPT_RC,
 	IB_QPT_UC,
 	IB_QPT_UD,
+	IB_QPT_XRC,
 	IB_QPT_RAW_IPV6,
 	IB_QPT_RAW_ETY
 };
@@ -566,6 +568,7 @@ struct ib_qp_init_attr {
 	struct ib_cq	       *send_cq;
 	struct ib_cq	       *recv_cq;
 	struct ib_srq	       *srq;
+	struct ib_xrcd	       *xrcd;	  /* XRC QPs only */
 	struct ib_qp_cap	cap;
 	enum ib_sig_type	sq_sig_type;
 	enum ib_qp_type		qp_type;
@@ -753,6 +756,7 @@ struct ib_send_wr {
 			u32				rkey;
 		} fast_reg;
 	} wr;
+	u32			xrc_remote_srq_num; /* valid for XRC sends only */
 };
 
 struct ib_recv_wr {
@@ -814,6 +818,7 @@ struct ib_ucontext {
 	struct list_head	qp_list;
 	struct list_head	srq_list;
 	struct list_head	ah_list;
+	struct list_head	xrcd_list;
 	int			closing;
 };
 
@@ -841,6 +846,12 @@ struct ib_pd {
 	atomic_t          	usecnt; /* count all resources */
 };
 
+struct ib_xrcd {
+	struct ib_device       *device;
+	struct ib_uobject      *uobject;
+	atomic_t		usecnt; /* count all resources */
+};
+
 struct ib_ah {
 	struct ib_device	*device;
 	struct ib_pd		*pd;
@@ -862,10 +873,13 @@ struct ib_cq {
 struct ib_srq {
 	struct ib_device       *device;
 	struct ib_pd	       *pd;
+	struct ib_cq	       *xrc_cq;
+	struct ib_xrcd	       *xrcd;
 	struct ib_uobject      *uobject;
 	void		      (*event_handler)(struct ib_event *, void *);
 	void		       *srq_context;
 	atomic_t		usecnt;
+	u32			xrc_srq_num;
 };
 
 struct ib_qp {
@@ -874,6 +888,7 @@ struct ib_qp {
 	struct ib_cq	       *send_cq;
 	struct ib_cq	       *recv_cq;
 	struct ib_srq	       *srq;
+	struct ib_xrcd	       *xrcd;  /* XRC QPs only */
 	struct ib_uobject      *uobject;
 	void                  (*event_handler)(struct ib_event *, void *);
 	void		       *qp_context;
@@ -1130,6 +1145,15 @@ struct ib_device {
 						  struct ib_grh *in_grh,
 						  struct ib_mad *in_mad,
 						  struct ib_mad *out_mad);
+	struct ib_srq *		   (*create_xrc_srq)(struct ib_pd *pd,
+						     struct ib_cq *xrc_cq,
+						     struct ib_xrcd *xrcd,
+						     struct ib_srq_init_attr *srq_init_attr,
+						     struct ib_udata *udata);
+	struct ib_xrcd *	   (*alloc_xrcd)(struct ib_device *device,
+						 struct ib_ucontext *context,
+						 struct ib_udata *udata);
+	int			   (*dealloc_xrcd)(struct ib_xrcd *xrcd);
 
 	struct ib_dma_mapping_ops   *dma_ops;
 
@@ -1312,8 +1336,28 @@ int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
 int ib_destroy_ah(struct ib_ah *ah);
 
 /**
- * ib_create_srq - Creates a SRQ associated with the specified protection
- *   domain.
+ * ib_create_xrc_srq - Creates an XRC SRQ associated with the specified
+ *   protection domain, completion queue, and XRC domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @xrc_cq: The CQ to be associated with the XRC SRQ.
+ * @xrcd: The XRC domain to be associated with the XRC SRQ.
+ * @srq_init_attr: A list of initial attributes required to create the
+ *   XRC SRQ.  If XRC SRQ creation succeeds, then the attributes are
+ *   updated to the actual capabilities of the created XRC SRQ.
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the XRC SRQ, and set to the actual values allocated
+ * on return.  If ib_create_xrc_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
+				 struct ib_cq *xrc_cq,
+				 struct ib_xrcd *xrcd,
+				 struct ib_srq_init_attr *srq_init_attr);
+
+/**
+ * ib_create_srq - Creates an SRQ associated with the specified
+ *   protection domain.
  * @pd: The protection domain associated with the SRQ.
  * @srq_init_attr: A list of initial attributes required to create the
  *   SRQ.  If SRQ creation succeeds, then the attributes are updated to
@@ -2036,4 +2080,16 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
  */
 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
 
+/**
+ * ib_alloc_xrcd - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
+ */
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
+
+/**
+ * ib_dealloc_xrcd - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
+ */
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+
 #endif /* IB_VERBS_H */
-- 
1.6.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/4] IB/uverbs: Support for XRC
       [not found] ` <1264446094-4460-1-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  2010-01-25 19:01   ` [PATCH 1/4] IB/core: XRC base implementation Roland Dreier
@ 2010-01-25 19:01   ` Roland Dreier
       [not found]     ` <1264446094-4460-3-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  2010-01-25 19:01   ` [PATCH 3/4] IB/uverbs: Add struct ib_usrq_object and ib_uxrcd_object Roland Dreier
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Roland Dreier @ 2010-01-25 19:01 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jack Morgenstein

From: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>

Add support for core userspace XRC operations (alloc/dealloc XRC
domain, create XRC SRQ), including adding an ABI for marshalling
requests and responses.

Signed-off-by: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
Signed-off-by: Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/core/uverbs.h      |    4 +
 drivers/infiniband/core/uverbs_cmd.c  |  274 ++++++++++++++++++++++++++++++++-
 drivers/infiniband/core/uverbs_main.c |   35 +++--
 include/rdma/ib_user_verbs.h          |   35 ++++-
 4 files changed, 334 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index b3ea958..f9c051e 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -141,6 +141,7 @@ extern struct idr ib_uverbs_ah_idr;
 extern struct idr ib_uverbs_cq_idr;
 extern struct idr ib_uverbs_qp_idr;
 extern struct idr ib_uverbs_srq_idr;
+extern struct idr ib_uverbs_xrcd_idr;
 
 void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
 
@@ -194,5 +195,8 @@ IB_UVERBS_DECLARE_CMD(create_srq);
 IB_UVERBS_DECLARE_CMD(modify_srq);
 IB_UVERBS_DECLARE_CMD(query_srq);
 IB_UVERBS_DECLARE_CMD(destroy_srq);
+IB_UVERBS_DECLARE_CMD(create_xrc_srq);
+IB_UVERBS_DECLARE_CMD(open_xrcd);
+IB_UVERBS_DECLARE_CMD(close_xrcd);
 
 #endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 112d397..92f9f11 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -46,6 +46,7 @@ static struct lock_class_key cq_lock_key;
 static struct lock_class_key qp_lock_key;
 static struct lock_class_key ah_lock_key;
 static struct lock_class_key srq_lock_key;
+static struct lock_class_key xrcd_lock_key;
 
 #define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\
 	do {								\
@@ -254,6 +255,17 @@ static void put_srq_read(struct ib_srq *srq)
 	put_uobj_read(srq->uobject);
 }
 
+static struct ib_xrcd *idr_read_xrcd(int xrcd_handle,
+				     struct ib_ucontext *context)
+{
+	return idr_read_obj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
+}
+
+static void put_xrcd_read(struct ib_xrcd *xrcd)
+{
+	put_uobj_read(xrcd->uobject);
+}
+
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 			      const char __user *buf,
 			      int in_len, int out_len)
@@ -297,6 +309,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	INIT_LIST_HEAD(&ucontext->qp_list);
 	INIT_LIST_HEAD(&ucontext->srq_list);
 	INIT_LIST_HEAD(&ucontext->ah_list);
+	INIT_LIST_HEAD(&ucontext->xrcd_list);
 	ucontext->closing = 0;
 
 	resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -1026,6 +1039,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	struct ib_srq                  *srq;
 	struct ib_qp                   *qp;
 	struct ib_qp_init_attr          attr;
+	struct ib_xrcd		       *xrcd;
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -1045,13 +1059,17 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
 	down_write(&obj->uevent.uobject.mutex);
 
-	srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
+	srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ?
+		idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
+	xrcd = cmd.qp_type == IB_QPT_XRC ?
+		idr_read_xrcd(cmd.srq_handle, file->ucontext) : NULL;
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
 	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
 	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
 		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
 
-	if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
+	if (!pd || !scq || !rcq || (cmd.is_srq && !srq) ||
+	    (cmd.qp_type == IB_QPT_XRC && !xrcd)) {
 		ret = -EINVAL;
 		goto err_put;
 	}
@@ -1063,6 +1081,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	attr.srq           = srq;
 	attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 	attr.qp_type       = cmd.qp_type;
+	attr.xrcd          = xrcd;
 	attr.create_flags  = 0;
 
 	attr.cap.max_send_wr     = cmd.max_send_wr;
@@ -1090,11 +1109,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	qp->event_handler = attr.event_handler;
 	qp->qp_context    = attr.qp_context;
 	qp->qp_type	  = attr.qp_type;
+	qp->xrcd	  = attr.xrcd;
 	atomic_inc(&pd->usecnt);
 	atomic_inc(&attr.send_cq->usecnt);
 	atomic_inc(&attr.recv_cq->usecnt);
 	if (attr.srq)
 		atomic_inc(&attr.srq->usecnt);
+	else if (attr.xrcd)
+		atomic_inc(&attr.xrcd->usecnt);
 
 	obj->uevent.uobject.object = qp;
 	ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
@@ -1122,6 +1144,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 		put_cq_read(rcq);
 	if (srq)
 		put_srq_read(srq);
+	if (xrcd)
+		put_xrcd_read(xrcd);
 
 	mutex_lock(&file->mutex);
 	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@@ -1148,6 +1172,8 @@ err_put:
 		put_cq_read(rcq);
 	if (srq)
 		put_srq_read(srq);
+	if (xrcd)
+		put_xrcd_read(xrcd);
 
 	put_uobj_write(&obj->uevent.uobject);
 	return ret;
@@ -2000,6 +2026,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	srq->uobject       = &obj->uobject;
 	srq->event_handler = attr.event_handler;
 	srq->srq_context   = attr.srq_context;
+	srq->xrc_cq        = NULL;
+	srq->xrcd          = NULL;
 	atomic_inc(&pd->usecnt);
 	atomic_set(&srq->usecnt, 0);
 
@@ -2045,6 +2073,134 @@ err:
 	return ret;
 }
 
+ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
+				 const char __user *buf, int in_len,
+				 int out_len)
+{
+	struct ib_uverbs_create_xrc_srq  cmd;
+	struct ib_uverbs_create_srq_resp resp;
+	struct ib_udata			 udata;
+	struct ib_uevent_object		*obj;
+	struct ib_pd			*pd;
+	struct ib_srq			*srq;
+	struct ib_cq			*xrc_cq;
+	struct ib_xrcd			*xrcd;
+	struct ib_srq_init_attr		 attr;
+	int ret;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	INIT_UDATA(&udata, buf + sizeof cmd,
+		   (unsigned long) cmd.response + sizeof resp,
+		   in_len - sizeof cmd, out_len - sizeof resp);
+
+	obj = kmalloc(sizeof *obj, GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
+	down_write(&obj->uobject.mutex);
+
+	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
+	if (!pd) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	xrc_cq  = idr_read_cq(cmd.xrc_cq, file->ucontext, 0);
+	if (!xrc_cq) {
+		ret = -EINVAL;
+		goto err_put_pd;
+	}
+
+	xrcd  = idr_read_xrcd(cmd.xrcd_handle, file->ucontext);
+	if (!xrcd) {
+		ret = -EINVAL;
+		goto err_put_cq;
+	}
+
+	attr.event_handler  = ib_uverbs_srq_event_handler;
+	attr.srq_context    = file;
+	attr.attr.max_wr    = cmd.max_wr;
+	attr.attr.max_sge   = cmd.max_sge;
+	attr.attr.srq_limit = cmd.srq_limit;
+
+	obj->events_reported     = 0;
+	INIT_LIST_HEAD(&obj->event_list);
+
+	srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, &attr, &udata);
+	if (IS_ERR(srq)) {
+		ret = PTR_ERR(srq);
+		goto err_put;
+	}
+
+	srq->device	   = pd->device;
+	srq->pd		   = pd;
+	srq->uobject	   = &obj->uobject;
+	srq->event_handler = attr.event_handler;
+	srq->srq_context   = attr.srq_context;
+	srq->xrc_cq	   = xrc_cq;
+	srq->xrcd	   = xrcd;
+	atomic_inc(&pd->usecnt);
+	atomic_inc(&xrc_cq->usecnt);
+	atomic_inc(&xrcd->usecnt);
+
+	atomic_set(&srq->usecnt, 0);
+
+	obj->uobject.object = srq;
+	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+	if (ret)
+		goto err_destroy;
+
+	memset(&resp, 0, sizeof resp);
+	resp.srq_handle	= obj->uobject.id;
+	resp.max_wr	= attr.attr.max_wr;
+	resp.max_sge	= attr.attr.max_sge;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp)) {
+		ret = -EFAULT;
+		goto err_copy;
+	}
+
+	put_xrcd_read(xrcd);
+	put_cq_read(xrc_cq);
+	put_pd_read(pd);
+
+	mutex_lock(&file->mutex);
+	list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
+	mutex_unlock(&file->mutex);
+
+	obj->uobject.live = 1;
+
+	up_write(&obj->uobject.mutex);
+
+	return in_len;
+
+err_copy:
+	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+
+err_destroy:
+	ib_destroy_srq(srq);
+
+err_put:
+	put_xrcd_read(xrcd);
+
+err_put_cq:
+	put_cq_read(xrc_cq);
+
+err_put_pd:
+	put_pd_read(pd);
+
+err:
+	put_uobj_write(&obj->uobject);
+	return ret;
+}
+
 ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 			     const char __user *buf, int in_len,
 			     int out_len)
@@ -2163,3 +2319,117 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 
 	return ret ? ret : in_len;
 }
+
+ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_open_xrcd	cmd;
+	struct ib_uverbs_open_xrcd_resp	resp;
+	struct ib_udata			udata;
+	struct ib_uobject	       *uobj;
+	struct ib_xrcd		       *xrcd;
+	int ret;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	/* file descriptors/inodes not yet implemented */
+	if (cmd.fd != -1)
+		return -ENOSYS;
+
+	INIT_UDATA(&udata, buf + sizeof cmd,
+		   (unsigned long) cmd.response + sizeof resp,
+		   in_len - sizeof cmd, out_len - sizeof resp);
+
+	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+	if (!uobj)
+		return -ENOMEM;
+
+	init_uobj(uobj, 0, file->ucontext, &xrcd_lock_key);
+	down_write(&uobj->mutex);
+
+	xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+						file->ucontext, &udata);
+	if (IS_ERR(xrcd)) {
+		ret = PTR_ERR(xrcd);
+		goto err;
+	}
+
+	xrcd->uobject = uobj;
+	xrcd->device  = file->device->ib_dev;
+	atomic_set(&xrcd->usecnt, 0);
+
+	uobj->object = xrcd;
+	ret = idr_add_uobj(&ib_uverbs_xrcd_idr, uobj);
+	if (ret)
+		goto err_idr;
+
+	memset(&resp, 0, sizeof resp);
+	resp.xrcd_handle = uobj->id;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp)) {
+		ret = -EFAULT;
+		goto err_copy;
+	}
+
+	mutex_lock(&file->mutex);
+	list_add_tail(&uobj->list, &file->ucontext->xrcd_list);
+	mutex_unlock(&file->mutex);
+
+	uobj->live = 1;
+
+	up_write(&uobj->mutex);
+
+	return in_len;
+
+err_copy:
+	idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+
+err_idr:
+	ib_dealloc_xrcd(xrcd);
+
+err:
+	put_uobj_write(uobj);
+	return ret;
+}
+
+ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
+				   const char __user *buf, int in_len,
+				   int out_len)
+{
+	struct ib_uverbs_close_xrcd	cmd;
+	struct ib_uobject	       *uobj;
+	int				ret;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle,
+			      file->ucontext);
+	if (!uobj)
+		return -EINVAL;
+
+	ret = ib_dealloc_xrcd(uobj->object);
+	if (!ret)
+		uobj->live = 0;
+
+	put_uobj_write(uobj);
+
+	if (ret)
+		return ret;
+
+	idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+
+	mutex_lock(&file->mutex);
+	list_del(&uobj->list);
+	mutex_unlock(&file->mutex);
+
+	put_uobj(uobj);
+
+	return in_len;
+}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5f284ff..09b5b58 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -73,6 +73,7 @@ DEFINE_IDR(ib_uverbs_ah_idr);
 DEFINE_IDR(ib_uverbs_cq_idr);
 DEFINE_IDR(ib_uverbs_qp_idr);
 DEFINE_IDR(ib_uverbs_srq_idr);
+DEFINE_IDR(ib_uverbs_xrcd_idr);
 
 static DEFINE_SPINLOCK(map_lock);
 static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
@@ -109,6 +110,9 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 	[IB_USER_VERBS_CMD_MODIFY_SRQ]    	= ib_uverbs_modify_srq,
 	[IB_USER_VERBS_CMD_QUERY_SRQ]     	= ib_uverbs_query_srq,
 	[IB_USER_VERBS_CMD_DESTROY_SRQ]   	= ib_uverbs_destroy_srq,
+	[IB_USER_VERBS_CMD_CREATE_XRC_SRQ]	= ib_uverbs_create_xrc_srq,
+	[IB_USER_VERBS_CMD_OPEN_XRCD]		= ib_uverbs_open_xrcd,
+	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd,
 };
 
 static struct vfsmount *uverbs_event_mnt;
@@ -212,17 +216,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 		kfree(uqp);
 	}
 
-	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
-		struct ib_cq *cq = uobj->object;
-		struct ib_uverbs_event_file *ev_file = cq->cq_context;
-		struct ib_ucq_object *ucq =
-			container_of(uobj, struct ib_ucq_object, uobject);
-
-		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
-		ib_destroy_cq(cq);
-		ib_uverbs_release_ucq(file, ev_file, ucq);
-		kfree(ucq);
-	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
 		struct ib_srq *srq = uobj->object;
@@ -235,6 +228,18 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 		kfree(uevent);
 	}
 
+	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
+		struct ib_cq *cq = uobj->object;
+		struct ib_uverbs_event_file *ev_file = cq->cq_context;
+		struct ib_ucq_object *ucq =
+			container_of(uobj, struct ib_ucq_object, uobject);
+
+		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
+		ib_destroy_cq(cq);
+		ib_uverbs_release_ucq(file, ev_file, ucq);
+		kfree(ucq);
+	}
+
 	/* XXX Free MWs */
 
 	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
@@ -245,6 +250,14 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 		kfree(uobj);
 	}
 
+	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
+		struct ib_xrcd *xrcd = uobj->object;
+
+		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+		ib_dealloc_xrcd(xrcd);
+		kfree(uobj);
+	}
+
 	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
 		struct ib_pd *pd = uobj->object;
 
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index a17f771..c9e540c 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -81,7 +81,10 @@ enum {
 	IB_USER_VERBS_CMD_MODIFY_SRQ,
 	IB_USER_VERBS_CMD_QUERY_SRQ,
 	IB_USER_VERBS_CMD_DESTROY_SRQ,
-	IB_USER_VERBS_CMD_POST_SRQ_RECV
+	IB_USER_VERBS_CMD_POST_SRQ_RECV,
+	IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
+	IB_USER_VERBS_CMD_OPEN_XRCD,
+	IB_USER_VERBS_CMD_CLOSE_XRCD
 };
 
 /*
@@ -647,6 +650,18 @@ struct ib_uverbs_create_srq {
 	__u64 driver_data[0];
 };
 
+struct ib_uverbs_create_xrc_srq {
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 max_wr;
+	__u32 max_sge;
+	__u32 srq_limit;
+	__u32 xrcd_handle;
+	__u32 xrc_cq;
+	__u64 driver_data[0];
+};
+
 struct ib_uverbs_create_srq_resp {
 	__u32 srq_handle;
 	__u32 max_wr;
@@ -686,4 +701,22 @@ struct ib_uverbs_destroy_srq_resp {
 	__u32 events_reported;
 };
 
+struct ib_uverbs_open_xrcd {
+	__u64 response;
+	__s32 fd;
+	__u32 oflags;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_open_xrcd_resp {
+	__u32 xrcd_handle;
+};
+
+struct ib_uverbs_close_xrcd {
+	__u64 response;
+	__u32 xrcd_handle;
+	__u32 reserved;
+	__u64 driver_data[0];
+};
+
 #endif /* IB_USER_VERBS_H */
-- 
1.6.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/4] IB/uverbs: Add struct ib_usrq_object and ib_uxrcd_object
       [not found] ` <1264446094-4460-1-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  2010-01-25 19:01   ` [PATCH 1/4] IB/core: XRC base implementation Roland Dreier
  2010-01-25 19:01   ` [PATCH 2/4] IB/uverbs: Support for XRC Roland Dreier
@ 2010-01-25 19:01   ` Roland Dreier
  2010-01-25 19:01   ` [PATCH 4/4] IB/uverbs: Support for associating XRC domains to inodes Roland Dreier
  2010-01-26 11:16   ` [PATCH 0/4] Current XRC queue Tziporet Koren
  4 siblings, 0 replies; 15+ messages in thread
From: Roland Dreier @ 2010-01-25 19:01 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA

When we add support for sharing XRC domains among multiple processes,
we will need to keep track of which XRCD each QP and SRQ is associated
with, and keep a per-userspace-context reference count for each XRCD.

Userspace QPs already have a struct ib_uqp_object associated with
them, so we can put an XRCD pointer there, but we don't have such an
object for SRQs.  To handle this, convert userspace SRQs from using
struct ib_uevent_object to a new struct ib_usrq_object.  To hold the
per-context XRCD reference count, we convert userspace XRCDs to use a
new struct ib_uxrcd_object.

This patch makes struct ib_usrq_object just contain a struct
ib_uevent_object and struct ib_uxrcd_object just contain a struct
ib_uobject.  The follow-up patch adding support for shared XRCDs will
add the extra fields.

Signed-off-by: Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/core/uverbs.h      |    8 +++
 drivers/infiniband/core/uverbs_cmd.c  |   92 ++++++++++++++++----------------
 drivers/infiniband/core/uverbs_main.c |   12 +++--
 3 files changed, 61 insertions(+), 51 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index f9c051e..9180acd 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -119,6 +119,14 @@ struct ib_uevent_object {
 	u32			events_reported;
 };
 
+struct ib_uxrcd_object {
+	struct ib_uobject	uobject;
+};
+
+struct ib_usrq_object {
+	struct ib_uevent_object	uevent;
+};
+
 struct ib_uqp_object {
 	struct ib_uevent_object	uevent;
 	struct list_head 	mcast_list;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 92f9f11..b209339 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1977,7 +1977,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	struct ib_uverbs_create_srq      cmd;
 	struct ib_uverbs_create_srq_resp resp;
 	struct ib_udata                  udata;
-	struct ib_uevent_object         *obj;
+	struct ib_usrq_object		*obj;
 	struct ib_pd                    *pd;
 	struct ib_srq                   *srq;
 	struct ib_srq_init_attr          attr;
@@ -1997,8 +1997,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	if (!obj)
 		return -ENOMEM;
 
-	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
-	down_write(&obj->uobject.mutex);
+	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
+	down_write(&obj->uevent.uobject.mutex);
 
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
 	if (!pd) {
@@ -2012,8 +2012,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	attr.attr.max_sge   = cmd.max_sge;
 	attr.attr.srq_limit = cmd.srq_limit;
 
-	obj->events_reported     = 0;
-	INIT_LIST_HEAD(&obj->event_list);
+	obj->uevent.events_reported = 0;
+	INIT_LIST_HEAD(&obj->uevent.event_list);
 
 	srq = pd->device->create_srq(pd, &attr, &udata);
 	if (IS_ERR(srq)) {
@@ -2023,7 +2023,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 
 	srq->device    	   = pd->device;
 	srq->pd        	   = pd;
-	srq->uobject       = &obj->uobject;
+	srq->uobject       = &obj->uevent.uobject;
 	srq->event_handler = attr.event_handler;
 	srq->srq_context   = attr.srq_context;
 	srq->xrc_cq        = NULL;
@@ -2031,13 +2031,13 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	atomic_inc(&pd->usecnt);
 	atomic_set(&srq->usecnt, 0);
 
-	obj->uobject.object = srq;
-	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+	obj->uevent.uobject.object = srq;
+	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
 	if (ret)
 		goto err_destroy;
 
 	memset(&resp, 0, sizeof resp);
-	resp.srq_handle = obj->uobject.id;
+	resp.srq_handle = obj->uevent.uobject.id;
 	resp.max_wr     = attr.attr.max_wr;
 	resp.max_sge    = attr.attr.max_sge;
 
@@ -2050,17 +2050,17 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	put_pd_read(pd);
 
 	mutex_lock(&file->mutex);
-	list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
+	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
 	mutex_unlock(&file->mutex);
 
-	obj->uobject.live = 1;
+	obj->uevent.uobject.live = 1;
 
-	up_write(&obj->uobject.mutex);
+	up_write(&obj->uevent.uobject.mutex);
 
 	return in_len;
 
 err_copy:
-	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
 
 err_destroy:
 	ib_destroy_srq(srq);
@@ -2069,7 +2069,7 @@ err_put:
 	put_pd_read(pd);
 
 err:
-	put_uobj_write(&obj->uobject);
+	put_uobj_write(&obj->uevent.uobject);
 	return ret;
 }
 
@@ -2080,7 +2080,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 	struct ib_uverbs_create_xrc_srq  cmd;
 	struct ib_uverbs_create_srq_resp resp;
 	struct ib_udata			 udata;
-	struct ib_uevent_object		*obj;
+	struct ib_usrq_object		*obj;
 	struct ib_pd			*pd;
 	struct ib_srq			*srq;
 	struct ib_cq			*xrc_cq;
@@ -2102,8 +2102,8 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 	if (!obj)
 		return -ENOMEM;
 
-	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
-	down_write(&obj->uobject.mutex);
+	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
+	down_write(&obj->uevent.uobject.mutex);
 
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
 	if (!pd) {
@@ -2129,8 +2129,8 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 	attr.attr.max_sge   = cmd.max_sge;
 	attr.attr.srq_limit = cmd.srq_limit;
 
-	obj->events_reported     = 0;
-	INIT_LIST_HEAD(&obj->event_list);
+	obj->uevent.events_reported = 0;
+	INIT_LIST_HEAD(&obj->uevent.event_list);
 
 	srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, &attr, &udata);
 	if (IS_ERR(srq)) {
@@ -2140,7 +2140,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 
 	srq->device	   = pd->device;
 	srq->pd		   = pd;
-	srq->uobject	   = &obj->uobject;
+	srq->uobject	   = &obj->uevent.uobject;
 	srq->event_handler = attr.event_handler;
 	srq->srq_context   = attr.srq_context;
 	srq->xrc_cq	   = xrc_cq;
@@ -2151,13 +2151,13 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 
 	atomic_set(&srq->usecnt, 0);
 
-	obj->uobject.object = srq;
-	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+	obj->uevent.uobject.object = srq;
+	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
 	if (ret)
 		goto err_destroy;
 
 	memset(&resp, 0, sizeof resp);
-	resp.srq_handle	= obj->uobject.id;
+	resp.srq_handle	= obj->uevent.uobject.id;
 	resp.max_wr	= attr.attr.max_wr;
 	resp.max_sge	= attr.attr.max_sge;
 
@@ -2172,17 +2172,17 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 	put_pd_read(pd);
 
 	mutex_lock(&file->mutex);
-	list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
+	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
 	mutex_unlock(&file->mutex);
 
-	obj->uobject.live = 1;
+	obj->uevent.uobject.live = 1;
 
-	up_write(&obj->uobject.mutex);
+	up_write(&obj->uevent.uobject.mutex);
 
 	return in_len;
 
 err_copy:
-	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
 
 err_destroy:
 	ib_destroy_srq(srq);
@@ -2197,7 +2197,7 @@ err_put_pd:
 	put_pd_read(pd);
 
 err:
-	put_uobj_write(&obj->uobject);
+	put_uobj_write(&obj->uevent.uobject);
 	return ret;
 }
 
@@ -2279,7 +2279,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_srq_resp resp;
 	struct ib_uobject		 *uobj;
 	struct ib_srq               	 *srq;
-	struct ib_uevent_object        	 *obj;
+	struct ib_usrq_object		 *obj;
 	int                         	  ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -2289,7 +2289,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	if (!uobj)
 		return -EINVAL;
 	srq = uobj->object;
-	obj = container_of(uobj, struct ib_uevent_object, uobject);
+	obj = container_of(uobj, struct ib_usrq_object, uevent.uobject);
 
 	ret = ib_destroy_srq(srq);
 	if (!ret)
@@ -2306,10 +2306,10 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	list_del(&uobj->list);
 	mutex_unlock(&file->mutex);
 
-	ib_uverbs_release_uevent(file, obj);
+	ib_uverbs_release_uevent(file, &obj->uevent);
 
 	memset(&resp, 0, sizeof resp);
-	resp.events_reported = obj->events_reported;
+	resp.events_reported = obj->uevent.events_reported;
 
 	put_uobj(uobj);
 
@@ -2327,7 +2327,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	struct ib_uverbs_open_xrcd	cmd;
 	struct ib_uverbs_open_xrcd_resp	resp;
 	struct ib_udata			udata;
-	struct ib_uobject	       *uobj;
+	struct ib_uxrcd_object	       *obj;
 	struct ib_xrcd		       *xrcd;
 	int ret;
 
@@ -2345,12 +2345,12 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
-	if (!uobj)
+	obj = kmalloc(sizeof *obj, GFP_KERNEL);
+	if (!obj)
 		return -ENOMEM;
 
-	init_uobj(uobj, 0, file->ucontext, &xrcd_lock_key);
-	down_write(&uobj->mutex);
+	init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key);
+	down_write(&obj->uobject.mutex);
 
 	xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
 						file->ucontext, &udata);
@@ -2359,17 +2359,17 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 		goto err;
 	}
 
-	xrcd->uobject = uobj;
+	xrcd->uobject = &obj->uobject;
 	xrcd->device  = file->device->ib_dev;
 	atomic_set(&xrcd->usecnt, 0);
 
-	uobj->object = xrcd;
-	ret = idr_add_uobj(&ib_uverbs_xrcd_idr, uobj);
+	obj->uobject.object = xrcd;
+	ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
 	if (ret)
 		goto err_idr;
 
 	memset(&resp, 0, sizeof resp);
-	resp.xrcd_handle = uobj->id;
+	resp.xrcd_handle = obj->uobject.id;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
@@ -2378,23 +2378,23 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	}
 
 	mutex_lock(&file->mutex);
-	list_add_tail(&uobj->list, &file->ucontext->xrcd_list);
+	list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
 	mutex_unlock(&file->mutex);
 
-	uobj->live = 1;
+	obj->uobject.live = 1;
 
-	up_write(&uobj->mutex);
+	up_write(&obj->uobject.mutex);
 
 	return in_len;
 
 err_copy:
-	idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+	idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
 
 err_idr:
 	ib_dealloc_xrcd(xrcd);
 
 err:
-	put_uobj_write(uobj);
+	put_uobj_write(&obj->uobject);
 	return ret;
 }
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 09b5b58..2a97810 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -219,13 +219,13 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 
 	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
 		struct ib_srq *srq = uobj->object;
-		struct ib_uevent_object *uevent =
-			container_of(uobj, struct ib_uevent_object, uobject);
+		struct ib_usrq_object *usrq =
+			container_of(uobj, struct ib_usrq_object, uevent.uobject);
 
 		idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
 		ib_destroy_srq(srq);
-		ib_uverbs_release_uevent(file, uevent);
-		kfree(uevent);
+		ib_uverbs_release_uevent(file, &usrq->uevent);
+		kfree(usrq);
 	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
@@ -252,10 +252,12 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 
 	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
 		struct ib_xrcd *xrcd = uobj->object;
+		struct ib_uxrcd_object *uxrcd =
+			container_of(uobj, struct ib_uxrcd_object, uobject);
 
 		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
 		ib_dealloc_xrcd(xrcd);
-		kfree(uobj);
+		kfree(uxrcd);
 	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
-- 
1.6.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/4] IB/uverbs: Support for associating XRC domains to inodes
       [not found] ` <1264446094-4460-1-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
                     ` (2 preceding siblings ...)
  2010-01-25 19:01   ` [PATCH 3/4] IB/uverbs: Add struct ib_usrq_object and ib_uxrcd_object Roland Dreier
@ 2010-01-25 19:01   ` Roland Dreier
       [not found]     ` <1264446094-4460-5-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  2010-01-26 11:16   ` [PATCH 0/4] Current XRC queue Tziporet Koren
  4 siblings, 1 reply; 15+ messages in thread
From: Roland Dreier @ 2010-01-25 19:01 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jack Morgenstein

From: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>

Add support for associating an XRC domain to an inode so that XRC
domains can be shared between processes.  We keep a per-device RB tree
of XRCDs (indexed by inode) in the uverbs module, and use struct
ib_xrcd's usecnt member to reference count XRCDs so that an XRCD is
not freed until the last process with a reference is done with it.

Signed-off-by: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
Signed-off-by: Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/core/uverbs.h      |    9 +-
 drivers/infiniband/core/uverbs_cmd.c  |  298 ++++++++++++++++++++++++++++-----
 drivers/infiniband/core/uverbs_main.c |    6 +-
 drivers/infiniband/core/verbs.c       |    2 +-
 include/rdma/ib_verbs.h               |    2 +-
 5 files changed, 272 insertions(+), 45 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 9180acd..e873437 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -70,10 +70,12 @@
 struct ib_uverbs_device {
 	struct kref				ref;
 	struct completion			comp;
-	int					devnum;
 	struct cdev			       *cdev;
 	struct device			       *dev;
 	struct ib_device		       *ib_dev;
+	struct rb_root				xrcd_tree;
+	struct mutex				xrcd_tree_mutex;
+	int					devnum;
 	int					num_comp_vectors;
 };
 
@@ -121,15 +123,18 @@ struct ib_uevent_object {
 
 struct ib_uxrcd_object {
 	struct ib_uobject	uobject;
+	atomic_t		refcnt;
 };
 
 struct ib_usrq_object {
 	struct ib_uevent_object	uevent;
+	struct ib_uxrcd_object *uxrcd;
 };
 
 struct ib_uqp_object {
 	struct ib_uevent_object	uevent;
 	struct list_head 	mcast_list;
+	struct ib_uxrcd_object *uxrcd;
 };
 
 struct ib_ucq_object {
@@ -169,6 +174,8 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
 			     struct ib_event *event);
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+			    struct ib_xrcd *xrcd);
 
 #define IB_UVERBS_DECLARE_CMD(name)					\
 	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b209339..3db78cb 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -256,14 +256,11 @@ static void put_srq_read(struct ib_srq *srq)
 }
 
 static struct ib_xrcd *idr_read_xrcd(int xrcd_handle,
-				     struct ib_ucontext *context)
+				     struct ib_ucontext *context,
+				     struct ib_uobject **uobj)
 {
-	return idr_read_obj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
-}
-
-static void put_xrcd_read(struct ib_xrcd *xrcd)
-{
-	put_uobj_read(xrcd->uobject);
+	*uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
+	return *uobj ? (*uobj)->object : NULL;
 }
 
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -1040,6 +1037,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	struct ib_qp                   *qp;
 	struct ib_qp_init_attr          attr;
 	struct ib_xrcd		       *xrcd;
+	struct ib_uobject	       *uninitialized_var(xrcd_uobj);
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -1062,7 +1060,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ?
 		idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
 	xrcd = cmd.qp_type == IB_QPT_XRC ?
-		idr_read_xrcd(cmd.srq_handle, file->ucontext) : NULL;
+		idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : NULL;
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
 	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
 	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
@@ -1091,13 +1089,18 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	attr.cap.max_inline_data = cmd.max_inline_data;
 
 	obj->uevent.events_reported     = 0;
+	if (xrcd) {
+		obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+		atomic_inc(&obj->uxrcd->refcnt);
+	} else
+		obj->uxrcd = NULL;
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 	INIT_LIST_HEAD(&obj->mcast_list);
 
 	qp = pd->device->create_qp(pd, &attr, &udata);
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
-		goto err_put;
+		goto err_xrcd_ref;
 	}
 
 	qp->device     	  = pd->device;
@@ -1145,7 +1148,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	if (srq)
 		put_srq_read(srq);
 	if (xrcd)
-		put_xrcd_read(xrcd);
+		put_uobj_read(xrcd_uobj);
 
 	mutex_lock(&file->mutex);
 	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@@ -1163,6 +1166,10 @@ err_copy:
 err_destroy:
 	ib_destroy_qp(qp);
 
+err_xrcd_ref:
+	if (xrcd)
+		atomic_dec(&obj->uxrcd->refcnt);
+
 err_put:
 	if (pd)
 		put_pd_read(pd);
@@ -1173,7 +1180,7 @@ err_put:
 	if (srq)
 		put_srq_read(srq);
 	if (xrcd)
-		put_xrcd_read(xrcd);
+		atomic_dec(&obj->uxrcd->refcnt);
 
 	put_uobj_write(&obj->uevent.uobject);
 	return ret;
@@ -1402,6 +1409,8 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	atomic_dec(&obj->uxrcd->refcnt);
+
 	idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
 
 	mutex_lock(&file->mutex);
@@ -2032,6 +2041,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	atomic_set(&srq->usecnt, 0);
 
 	obj->uevent.uobject.object = srq;
+	obj->uxrcd = NULL;
 	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
 	if (ret)
 		goto err_destroy;
@@ -2085,6 +2095,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 	struct ib_srq			*srq;
 	struct ib_cq			*xrc_cq;
 	struct ib_xrcd			*xrcd;
+	struct ib_uobject		*xrcd_uobj;
 	struct ib_srq_init_attr		 attr;
 	int ret;
 
@@ -2117,7 +2128,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 		goto err_put_pd;
 	}
 
-	xrcd  = idr_read_xrcd(cmd.xrcd_handle, file->ucontext);
+	xrcd  = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj);
 	if (!xrcd) {
 		ret = -EINVAL;
 		goto err_put_cq;
@@ -2130,6 +2141,8 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 	attr.attr.srq_limit = cmd.srq_limit;
 
 	obj->uevent.events_reported = 0;
+	obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+	atomic_inc(&obj->uxrcd->refcnt);
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 
 	srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, &attr, &udata);
@@ -2167,7 +2180,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 		goto err_copy;
 	}
 
-	put_xrcd_read(xrcd);
+	put_uobj_read(xrcd_uobj);
 	put_cq_read(xrc_cq);
 	put_pd_read(pd);
 
@@ -2188,7 +2201,8 @@ err_destroy:
 	ib_destroy_srq(srq);
 
 err_put:
-	put_xrcd_read(xrcd);
+	atomic_dec(&obj->uxrcd->refcnt);
+	put_uobj_read(xrcd_uobj);
 
 err_put_cq:
 	put_cq_read(xrc_cq);
@@ -2300,6 +2314,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	atomic_dec(&obj->uxrcd->refcnt);
+
 	idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
 
 	mutex_lock(&file->mutex);
@@ -2320,6 +2336,93 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	return ret ? ret : in_len;
 }
 
+struct xrcd_table_entry {
+	struct rb_node	node;
+	struct ib_xrcd *xrcd;
+	struct inode   *inode;
+};
+
+static int xrcd_table_insert(struct ib_uverbs_device *dev,
+			     struct inode *inode,
+			     struct ib_xrcd *xrcd)
+{
+	struct xrcd_table_entry *entry, *scan;
+	struct rb_node **p = &dev->xrcd_tree.rb_node;
+	struct rb_node *parent = NULL;
+
+	entry = kmalloc(sizeof *entry, GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->xrcd  = xrcd;
+	entry->inode = inode;
+
+	while (*p) {
+		parent = *p;
+		scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+		if (inode < scan->inode)
+			p = &(*p)->rb_left;
+		else if (inode > scan->inode)
+			p = &(*p)->rb_right;
+		else {
+			kfree(entry);
+			return -EEXIST;
+		}
+	}
+
+	rb_link_node(&entry->node, parent, p);
+	rb_insert_color(&entry->node, &dev->xrcd_tree);
+
+	igrab(inode);
+
+	return 0;
+}
+
+static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
+						  struct inode *inode)
+{
+	struct xrcd_table_entry *entry;
+	struct rb_node *p = dev->xrcd_tree.rb_node;
+
+	while (p) {
+		entry = rb_entry(p, struct xrcd_table_entry, node);
+
+		if (inode < entry->inode)
+			p = p->rb_left;
+		else if (inode > entry->inode)
+			p = p->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
+{
+	struct xrcd_table_entry *entry;
+
+	entry = xrcd_table_search(dev, inode);
+	if (!entry)
+		return NULL;
+
+	return entry->xrcd;
+}
+
+
+static void xrcd_table_delete(struct ib_uverbs_device *dev,
+			      struct inode *inode)
+{
+	struct xrcd_table_entry *entry = xrcd_table_search(dev, inode);
+
+	if (entry) {
+		iput(inode);
+		rb_erase(&entry->node, &dev->xrcd_tree);
+		kfree(entry);
+	}
+}
+
 ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 			    const char __user *buf, int in_len,
 			    int out_len)
@@ -2328,8 +2431,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	struct ib_uverbs_open_xrcd_resp	resp;
 	struct ib_udata			udata;
 	struct ib_uxrcd_object	       *obj;
-	struct ib_xrcd		       *xrcd;
-	int ret;
+	struct ib_xrcd		       *xrcd = NULL;
+	struct file		       *f = NULL;
+	struct inode		       *inode = NULL;
+	int				ret = 0;
+	int				new_xrcd = 0;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -2337,32 +2443,64 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	/* file descriptors/inodes not yet implemented */
-	if (cmd.fd != -1)
-		return -ENOSYS;
-
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
+	mutex_lock(&file->device->xrcd_tree_mutex);
+
+	if (cmd.fd != -1) {
+		/* search for file descriptor */
+		f = fget(cmd.fd);
+		if (!f) {
+			ret = -EBADF;
+			goto err_tree_mutex_unlock;
+		}
+
+		inode = f->f_dentry->d_inode;
+		if (!inode) {
+			ret = -EBADF;
+			goto err_tree_mutex_unlock;
+		}
+
+		xrcd = find_xrcd(file->device, inode);
+		if (!xrcd && !(cmd.oflags & O_CREAT)) {
+			/* no file descriptor. Need CREATE flag */
+			ret = -EAGAIN;
+			goto err_tree_mutex_unlock;
+		}
+
+		if (xrcd && cmd.oflags & O_EXCL) {
+			ret = -EINVAL;
+			goto err_tree_mutex_unlock;
+		}
+	}
+
 	obj = kmalloc(sizeof *obj, GFP_KERNEL);
-	if (!obj)
-		return -ENOMEM;
+	if (!obj) {
+		ret = -ENOMEM;
+		goto err_tree_mutex_unlock;
+	}
 
 	init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key);
+
 	down_write(&obj->uobject.mutex);
 
-	xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
-						file->ucontext, &udata);
-	if (IS_ERR(xrcd)) {
-		ret = PTR_ERR(xrcd);
-		goto err;
-	}
+	if (!xrcd) {
+		xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+							file->ucontext, &udata);
+		if (IS_ERR(xrcd)) {
+			ret = PTR_ERR(xrcd);
+			goto err;
+		}
 
-	xrcd->uobject = &obj->uobject;
-	xrcd->device  = file->device->ib_dev;
-	atomic_set(&xrcd->usecnt, 0);
+		xrcd->inode   = inode;
+		xrcd->device  = file->device->ib_dev;
+		atomic_set(&xrcd->usecnt, 0);
+		new_xrcd = 1;
+	}
 
+	atomic_set(&obj->refcnt, 0);
 	obj->uobject.object = xrcd;
 	ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
 	if (ret)
@@ -2371,12 +2509,25 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	memset(&resp, 0, sizeof resp);
 	resp.xrcd_handle = obj->uobject.id;
 
+	if (inode) {
+		if (new_xrcd) {
+			/* create new inode/xrcd table entry */
+			ret = xrcd_table_insert(file->device, inode, xrcd);
+			if (ret)
+				goto err_insert_xrcd;
+		}
+		atomic_inc(&xrcd->usecnt);
+	}
+
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
 		goto err_copy;
 	}
 
+	if (f)
+		fput(f);
+
 	mutex_lock(&file->mutex);
 	list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
 	mutex_unlock(&file->mutex);
@@ -2385,9 +2536,17 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 
 	up_write(&obj->uobject.mutex);
 
+	mutex_unlock(&file->device->xrcd_tree_mutex);
 	return in_len;
 
 err_copy:
+	if (inode) {
+		if (new_xrcd)
+			xrcd_table_delete(file->device, inode);
+		atomic_dec(&xrcd->usecnt);
+	}
+
+err_insert_xrcd:
 	idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
 
 err_idr:
@@ -2395,33 +2554,66 @@ err_idr:
 
 err:
 	put_uobj_write(&obj->uobject);
+
+err_tree_mutex_unlock:
+	if (f)
+		fput(f);
+
+	mutex_unlock(&file->device->xrcd_tree_mutex);
+
 	return ret;
 }
 
 ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
-				   const char __user *buf, int in_len,
-				   int out_len)
+			     const char __user *buf, int in_len,
+			     int out_len)
 {
 	struct ib_uverbs_close_xrcd	cmd;
 	struct ib_uobject	       *uobj;
-	int				ret;
+	struct ib_xrcd		       *xrcd = NULL;
+	struct inode		       *inode = NULL;
+	struct ib_uxrcd_object	       *obj;
+	int				live;
+	int				ret = 0;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	mutex_lock(&file->device->xrcd_tree_mutex);
 	uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle,
 			      file->ucontext);
-	if (!uobj)
-		return -EINVAL;
+	if (!uobj) {
+		ret = -EINVAL;
+		goto out;
+	}
 
-	ret = ib_dealloc_xrcd(uobj->object);
-	if (!ret)
-		uobj->live = 0;
+	xrcd  = uobj->object;
+	inode = xrcd->inode;
+	obj   = container_of(uobj, struct ib_uxrcd_object, uobject);
+	if (atomic_read(&obj->refcnt)) {
+		ret = -EBUSY;
+		put_uobj_write(uobj);
+		goto out;
+	}
+
+	if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
+		ret = ib_dealloc_xrcd(uobj->object);
+		if (!ret)
+			uobj->live = 0;
+	}
+
+	live = uobj->live;
+
+	if (inode && ret)
+		atomic_inc(&xrcd->usecnt);
 
 	put_uobj_write(uobj);
 
 	if (ret)
-		return ret;
+		goto out;
+
+	if (inode && !live)
+		xrcd_table_delete(file->device, inode);
 
 	idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
 
@@ -2431,5 +2623,29 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
 
 	put_uobj(uobj);
 
-	return in_len;
+	ret = in_len;
+
+out:
+	mutex_unlock(&file->device->xrcd_tree_mutex);
+	return ret;
+}
+
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+			    struct ib_xrcd *xrcd)
+{
+	struct inode *inode = NULL;
+	int ret = 0;
+
+	inode = xrcd->inode;
+	if (inode && !atomic_dec_and_test(&xrcd->usecnt))
+		return;
+
+	ret = ib_dealloc_xrcd(xrcd);
+
+	if (inode) {
+		if (!ret)
+			xrcd_table_delete(dev, inode);
+		else
+			atomic_inc(&xrcd->usecnt);
+	}
 }
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2a97810..2b9d744 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -250,15 +250,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 		kfree(uobj);
 	}
 
+	mutex_lock(&file->device->xrcd_tree_mutex);
 	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
 		struct ib_xrcd *xrcd = uobj->object;
 		struct ib_uxrcd_object *uxrcd =
 			container_of(uobj, struct ib_uxrcd_object, uobject);
 
 		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
-		ib_dealloc_xrcd(xrcd);
+		ib_uverbs_dealloc_xrcd(file->device, xrcd);
 		kfree(uxrcd);
 	}
+	mutex_unlock(&file->device->xrcd_tree_mutex);
 
 	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
 		struct ib_pd *pd = uobj->object;
@@ -763,6 +765,8 @@ static void ib_uverbs_add_one(struct ib_device *device)
 
 	kref_init(&uverbs_dev->ref);
 	init_completion(&uverbs_dev->comp);
+	uverbs_dev->xrcd_tree = RB_ROOT;
+	mutex_init(&uverbs_dev->xrcd_tree_mutex);
 
 	spin_lock(&map_lock);
 	uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index b75193c..99f76b6 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1014,7 +1014,7 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
 	xrcd = device->alloc_xrcd(device, NULL, NULL);
 	if (!IS_ERR(xrcd)) {
 		xrcd->device  = device;
-		xrcd->uobject = NULL;
+		xrcd->inode   = NULL;
 		atomic_set(&xrcd->usecnt, 0);
 	}
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1d843c3..322d145 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -848,7 +848,7 @@ struct ib_pd {
 
 struct ib_xrcd {
 	struct ib_device       *device;
-	struct ib_uobject      *uobject;
+	struct inode	       *inode;
 	atomic_t		usecnt; /* count all resources */
 };
 
-- 
1.6.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* RE: [PATCH 0/4] Current XRC queue
       [not found] ` <1264446094-4460-1-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
                     ` (3 preceding siblings ...)
  2010-01-25 19:01   ` [PATCH 4/4] IB/uverbs: Support for associating XRC domains to inodes Roland Dreier
@ 2010-01-26 11:16   ` Tziporet Koren
  4 siblings, 0 replies; 15+ messages in thread
From: Tziporet Koren @ 2010-01-26 11:16 UTC (permalink / raw)
  To: Roland Dreier, Jack Morgenstein; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

> I finally found time to finish going over the base XRC patches.  I
fixed
> the "userspace can destroy an XRCD handle while still having QPs/SRQs
> referring to it" problem with a reference count, rather than the more
> complicated method used by the original patches.
> 
> I think the next step would be for someone (probably not me if we want
> this to happen soon) to port the rest of the mlx4 XRC patch set on top
> of this, ideally trying to factor out any reference-counting-ish code
> away from the device-specific driver and into common code.  Then we
> should finally be able to merge this and move on to the IBoE changes.

Thanks Roland
Jack from Mellanox will take it from here

Tziporet

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/4] IB/uverbs: Support for XRC
       [not found]     ` <1264446094-4460-3-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
@ 2010-01-26 14:39       ` Or Gerlitz
       [not found]         ` <4B5EFEAD.9040702-hKgKHo2Ms0FWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Or Gerlitz @ 2010-01-26 14:39 UTC (permalink / raw)
  To: Roland Dreier, Jack Morgenstein; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

Roland Dreier wrote:
> Add support for core userspace XRC operations (alloc/dealloc XRC domain,
> create XRC SRQ), including adding an ABI for marshalling requests and responses.
> +++ b/include/rdma/ib_user_verbs.h
> @@ -81,7 +81,10 @@ enum {
> -	IB_USER_VERBS_CMD_POST_SRQ_RECV
> +	IB_USER_VERBS_CMD_POST_SRQ_RECV,
> +	IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
> +	IB_USER_VERBS_CMD_OPEN_XRCD,
> +	IB_USER_VERBS_CMD_CLOSE_XRCD

Jack,Roland I believe that adding entries to this enum means ABI change towards libibverbs or eventually cause ABI change between libibverbs and its consumers, (e.g change in size/content of the verbs context structure, etc), correct? 

I'd like to use this ABI change and introduce/expose also the ib_modify_cq verb to user space. Basically, the related patches should be quite simple (uverbs, libibverbs, libmlx4), still, I'll be glad if you tell/coach me against what code base to do that. 

One more thing I'd like to add to libibverbs and some device driver libraries is send-with-invalidate, but this is only user space patch set without any kernel/user, lib/app ABI change, correct?

Or.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/4] IB/uverbs: Support for XRC
       [not found]         ` <4B5EFEAD.9040702-hKgKHo2Ms0FWk0Htik3J/w@public.gmane.org>
@ 2010-01-26 23:51           ` Roland Dreier
       [not found]             ` <aday6jko26n.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Roland Dreier @ 2010-01-26 23:51 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Roland Dreier, Jack Morgenstein, linux-rdma-u79uwXL29TY76Z2rM5mHXA


 > > -	IB_USER_VERBS_CMD_POST_SRQ_RECV
 > > +	IB_USER_VERBS_CMD_POST_SRQ_RECV,
 > > +	IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
 > > +	IB_USER_VERBS_CMD_OPEN_XRCD,
 > > +	IB_USER_VERBS_CMD_CLOSE_XRCD
 > 
 > Jack,Roland I believe that adding entries to this enum means ABI change towards libibverbs or eventually cause ABI change between libibverbs and its consumers, (e.g change in size/content of the verbs context structure, etc), correct? 

This is a backwards-compatible ABI extension -- we are adding new
commands.  And I believe OFED has been shipping this ABI for quite a
while now (since 1.4?).

 > I'd like to use this ABI change and introduce/expose also the ib_modify_cq verb to user space. Basically, the related patches should be quite simple (uverbs, libibverbs, libmlx4), still, I'll be glad if you tell/coach me against what code base to do that. 

OK, but we should coordinate this with all the other ABI extensions that
OFED has already made.

 > One more thing I'd like to add to libibverbs and some device driver libraries is send-with-invalidate, but this is only user space patch set without any kernel/user, lib/app ABI change, correct?

It is a library ABI change of course to add the send with invalidate
opcode.  But that is OK.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/4] IB/uverbs: Support for XRC
       [not found]             ` <aday6jko26n.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-01-27  9:13               ` Or Gerlitz
       [not found]                 ` <4B6003B2.4040101-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Or Gerlitz @ 2010-01-27  9:13 UTC (permalink / raw)
  To: Roland Dreier, Jack Morgenstein
  Cc: Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA

Roland Dreier wrote:
> OK, but we should coordinate this with all the other ABI extensions that OFED has already made
I believe the only thing I need to coordinate with is the XRC patch set.

Jack, can you please direct me to the patch set I should be setting the 
modify_cq patches against? no problem if this is going to take some 
time, but I do want to push cq_modify in the same merge window with XRC, 
I assume we have at least couple of weeks for this to happen.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/4] IB/uverbs: Support for XRC
       [not found]                 ` <4B6003B2.4040101-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-01-27 10:55                   ` Jack Morgenstein
  2010-01-27 16:00                   ` Roland Dreier
  1 sibling, 0 replies; 15+ messages in thread
From: Jack Morgenstein @ 2010-01-27 10:55 UTC (permalink / raw)
  To: Or Gerlitz; +Cc: Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Wednesday 27 January 2010 11:13, Or Gerlitz wrote:
> Roland Dreier wrote:
> > OK, but we should coordinate this with all the other ABI extensions that OFED has already made
> I believe the only thing I need to coordinate with is the XRC patch set.
> 
> Jack, can you please direct me to the patch set I should be setting the 
> modify_cq patches against? no problem if this is going to take some 
> time, but I do want to push cq_modify in the same merge window with XRC, 
> I assume we have at least couple of weeks for this to happen.
> 
> Or.

I'll be getting into XRC tomorrow. Just finishing up some stuff.
The patch set will change, pursuant to Roland's changes.

-Jack
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/4] IB/uverbs: Support for XRC
       [not found]                 ` <4B6003B2.4040101-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  2010-01-27 10:55                   ` Jack Morgenstein
@ 2010-01-27 16:00                   ` Roland Dreier
       [not found]                     ` <ada8wbjo7v3.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  1 sibling, 1 reply; 15+ messages in thread
From: Roland Dreier @ 2010-01-27 16:00 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Jack Morgenstein, Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA


 > > OK, but we should coordinate this with all the other ABI extensions
 > > that OFED has already made

 > I believe the only thing I need to coordinate with is the XRC patch
 > set.

I think the IBoE patches in the OFED branch also extend the ABI.  we
should try to avoid stepping on that too.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/4] IB/uverbs: Support for XRC
       [not found]                     ` <ada8wbjo7v3.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-01-28  7:53                       ` Or Gerlitz
  0 siblings, 0 replies; 15+ messages in thread
From: Or Gerlitz @ 2010-01-28  7:53 UTC (permalink / raw)
  To: Roland Dreier, Eli Cohen
  Cc: Jack Morgenstein, Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA

Roland Dreier wrote:
>  > I believe the only thing I need to coordinate with is the XRC patch set.
>
> I think the IBoE patches in the OFED branch also extend the ABI. We should try to avoid stepping on that too.
I think the only (a part from the link type field in the port attr) ABI 
change introduced by the IBoE branch is the the get_mac verb. As I wrote 
Eli @ http://marc.info/?l=linux-rdma&m=126380119407472 to my opinion 
this verb isn't needed at all, so far he didn't reply.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] IB/uverbs: Support for associating XRC domains to inodes
       [not found]     ` <1264446094-4460-5-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
@ 2010-02-01 13:52       ` Jack Morgenstein
       [not found]         ` <201002011552.24904.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Jack Morgenstein @ 2010-02-01 13:52 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Tziporet Koren

On Monday 25 January 2010 21:01, Roland Dreier wrote:
> From: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
> 
> Add support for associating an XRC domain to an inode so that XRC
> domains can be shared between processes.  We keep a per-device RB tree
> of XRCDs (indexed by inode) in the uverbs module, and use struct
> ib_xrcd's usecnt member to reference count XRCDs so that an XRCD is
> not freed until the last process with a reference is done with it.
> 
> Signed-off-by: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
> Signed-off-by: Roland Dreier <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>

Feedback on the patch set.
I think I have uncovered a couple of bugs in my review (not surprising, since you
most likely could not try out the code).

1. the xrcd refcnt is incremented when creating an xrc qp and xrc srq, but is unconditionally
   decremented in destroy_qp/destroy_srq whether we have an xrcd or not.

2. In closing/deallocating xrc domains, it is not an error if ib_dealloc_xrcd returns "busy".
   This simply means that there are other user processes still using that xrc domain.
   The current process still needs to clean up its uobject.  In particular, the usecnt should
   not be re-incremented on the busy return. (If it gets re-incremented on busy, no one can ever
   succeed in closing an xrc domain if more that one process is using it).

The following patch fixes the above issues I think. (The patch below assumes that
the only error value returned by ib_dealloc_xrcd is "-EBUSY" -- as is the case with pd's.
The mlx4 driver always returns 0 for the deallocate functions).

Please review this and send me your feedback.

Thanks!
-Jack
=========================================================================================
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3db78cb..98c4ded 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1382,6 +1382,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	struct ib_uobject		*uobj;
 	struct ib_qp               	*qp;
 	struct ib_uqp_object        	*obj;
+	struct ib_xrcd			 *xrcd;
 	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1400,6 +1401,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 		return -EBUSY;
 	}
 
+	xrcd = qp->xrcd;
 	ret = ib_destroy_qp(qp);
 	if (!ret)
 		uobj->live = 0;
@@ -1409,7 +1411,8 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
-	atomic_dec(&obj->uxrcd->refcnt);
+	if (xrcd)
+		atomic_dec(&obj->uxrcd->refcnt);
 
 	idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
 
@@ -2294,6 +2297,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	struct ib_uobject		 *uobj;
 	struct ib_srq               	 *srq;
 	struct ib_usrq_object		 *obj;
+	struct ib_xrcd			 *xrcd;
 	int                         	  ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -2305,6 +2309,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	srq = uobj->object;
 	obj = container_of(uobj, struct ib_usrq_object, uevent.uobject);
 
+	xrcd = srq->xrcd;
 	ret = ib_destroy_srq(srq);
 	if (!ret)
 		uobj->live = 0;
@@ -2314,7 +2319,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
-	atomic_dec(&obj->uxrcd->refcnt);
+	if (xrcd)
+		atomic_dec(&obj->uxrcd->refcnt);
 
 	idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
 
@@ -2604,12 +2610,9 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
 
 	live = uobj->live;
 
-	if (inode && ret)
-		atomic_inc(&xrcd->usecnt);
-
 	put_uobj_write(uobj);
 
-	if (ret)
+	if (ret && !inode)
 		goto out;
 
 	if (inode && !live)
@@ -2637,15 +2640,10 @@ void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
 	int ret = 0;
 
 	inode = xrcd->inode;
-	if (inode && !atomic_dec_and_test(&xrcd->usecnt))
-		return;
+	if (inode)
+		atomic_dec(&xrcd->usecnt);
 
 	ret = ib_dealloc_xrcd(xrcd);
-
-	if (inode) {
-		if (!ret)
-			xrcd_table_delete(dev, inode);
-		else
-			atomic_inc(&xrcd->usecnt);
-	}
+	if (!ret && inode)
+		xrcd_table_delete(dev, inode);
 }
=======================================================================
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] IB/uverbs: Support for associating XRC domains to inodes
       [not found]         ` <201002011552.24904.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2010-02-03 21:35           ` Roland Dreier
       [not found]             ` <adaeil2gfy9.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Roland Dreier @ 2010-02-03 21:35 UTC (permalink / raw)
  To: Jack Morgenstein
  Cc: Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Tziporet Koren

 > I think I have uncovered a couple of bugs in my review (not surprising, since you
 > most likely could not try out the code).

Thanks.  Yes, this was all compile-tested only.

 > 1. the xrcd refcnt is incremented when creating an xrc qp and xrc srq, but is unconditionally
 >    decremented in destroy_qp/destroy_srq whether we have an xrcd or not.

This was all messed up... the error path of create_qp was bogus too.
I think the updated patch below is good though.

 > 2. In closing/deallocating xrc domains, it is not an error if ib_dealloc_xrcd returns "busy".
 >    This simply means that there are other user processes still using that xrc domain.
 >    The current process still needs to clean up its uobject.  In particular, the usecnt should
 >    not be re-incremented on the busy return. (If it gets re-incremented on busy, no one can ever
 >    succeed in closing an xrc domain if more that one process is using it).

I do agree we shouldn't reincrement the use count if the dealloc
fails -- in ib_uverbs_dealloc_xrcd we're in the path of closing a
context and so there's no real way to recover if the ib_dealloc_xrcd
fails.  But I don't think the busy case is an issue -- we should return
without trying to dealloc if the xrcd is held by another process, since
the atomic_dec_and_test will return a non-zero value in that case I
think.  So the simplification in the patch below should be OK, right?
---
 drivers/infiniband/core/uverbs.h      |    9 +-
 drivers/infiniband/core/uverbs_cmd.c  |  294 ++++++++++++++++++++++++++++-----
 drivers/infiniband/core/uverbs_main.c |    6 +-
 drivers/infiniband/core/verbs.c       |    2 +-
 include/rdma/ib_verbs.h               |    2 +-
 5 files changed, 268 insertions(+), 45 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 9180acd..e873437 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -70,10 +70,12 @@
 struct ib_uverbs_device {
 	struct kref				ref;
 	struct completion			comp;
-	int					devnum;
 	struct cdev			       *cdev;
 	struct device			       *dev;
 	struct ib_device		       *ib_dev;
+	struct rb_root				xrcd_tree;
+	struct mutex				xrcd_tree_mutex;
+	int					devnum;
 	int					num_comp_vectors;
 };
 
@@ -121,15 +123,18 @@ struct ib_uevent_object {
 
 struct ib_uxrcd_object {
 	struct ib_uobject	uobject;
+	atomic_t		refcnt;
 };
 
 struct ib_usrq_object {
 	struct ib_uevent_object	uevent;
+	struct ib_uxrcd_object *uxrcd;
 };
 
 struct ib_uqp_object {
 	struct ib_uevent_object	uevent;
 	struct list_head 	mcast_list;
+	struct ib_uxrcd_object *uxrcd;
 };
 
 struct ib_ucq_object {
@@ -169,6 +174,8 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
 			     struct ib_event *event);
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+			    struct ib_xrcd *xrcd);
 
 #define IB_UVERBS_DECLARE_CMD(name)					\
 	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b209339..cd4c692 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -256,14 +256,11 @@ static void put_srq_read(struct ib_srq *srq)
 }
 
 static struct ib_xrcd *idr_read_xrcd(int xrcd_handle,
-				     struct ib_ucontext *context)
+				     struct ib_ucontext *context,
+				     struct ib_uobject **uobj)
 {
-	return idr_read_obj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
-}
-
-static void put_xrcd_read(struct ib_xrcd *xrcd)
-{
-	put_uobj_read(xrcd->uobject);
+	*uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
+	return *uobj ? (*uobj)->object : NULL;
 }
 
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -1040,6 +1037,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	struct ib_qp                   *qp;
 	struct ib_qp_init_attr          attr;
 	struct ib_xrcd		       *xrcd;
+	struct ib_uobject	       *uninitialized_var(xrcd_uobj);
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -1062,12 +1060,18 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ?
 		idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
 	xrcd = cmd.qp_type == IB_QPT_XRC ?
-		idr_read_xrcd(cmd.srq_handle, file->ucontext) : NULL;
+		idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : NULL;
 	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
 	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
 	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
 		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
 
+	if (xrcd) {
+		obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+		atomic_inc(&obj->uxrcd->refcnt);
+	} else
+		obj->uxrcd = NULL;
+
 	if (!pd || !scq || !rcq || (cmd.is_srq && !srq) ||
 	    (cmd.qp_type == IB_QPT_XRC && !xrcd)) {
 		ret = -EINVAL;
@@ -1145,7 +1149,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 	if (srq)
 		put_srq_read(srq);
 	if (xrcd)
-		put_xrcd_read(xrcd);
+		put_uobj_read(xrcd_uobj);
 
 	mutex_lock(&file->mutex);
 	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
@@ -1172,8 +1176,10 @@ err_put:
 		put_cq_read(rcq);
 	if (srq)
 		put_srq_read(srq);
-	if (xrcd)
-		put_xrcd_read(xrcd);
+	if (xrcd) {
+		atomic_dec(&obj->uxrcd->refcnt);
+		put_uobj_read(xrcd_uobj);
+	}
 
 	put_uobj_write(&obj->uevent.uobject);
 	return ret;
@@ -1402,6 +1408,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	if (obj->uxrcd)
+		atomic_dec(&obj->uxrcd->refcnt);
+
 	idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
 
 	mutex_lock(&file->mutex);
@@ -2032,6 +2041,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 	atomic_set(&srq->usecnt, 0);
 
 	obj->uevent.uobject.object = srq;
+	obj->uxrcd = NULL;
 	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
 	if (ret)
 		goto err_destroy;
@@ -2085,6 +2095,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 	struct ib_srq			*srq;
 	struct ib_cq			*xrc_cq;
 	struct ib_xrcd			*xrcd;
+	struct ib_uobject		*xrcd_uobj;
 	struct ib_srq_init_attr		 attr;
 	int ret;
 
@@ -2117,7 +2128,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 		goto err_put_pd;
 	}
 
-	xrcd  = idr_read_xrcd(cmd.xrcd_handle, file->ucontext);
+	xrcd  = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj);
 	if (!xrcd) {
 		ret = -EINVAL;
 		goto err_put_cq;
@@ -2130,6 +2141,8 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 	attr.attr.srq_limit = cmd.srq_limit;
 
 	obj->uevent.events_reported = 0;
+	obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+	atomic_inc(&obj->uxrcd->refcnt);
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 
 	srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, &attr, &udata);
@@ -2167,7 +2180,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
 		goto err_copy;
 	}
 
-	put_xrcd_read(xrcd);
+	put_uobj_read(xrcd_uobj);
 	put_cq_read(xrc_cq);
 	put_pd_read(pd);
 
@@ -2188,7 +2201,8 @@ err_destroy:
 	ib_destroy_srq(srq);
 
 err_put:
-	put_xrcd_read(xrcd);
+	atomic_dec(&obj->uxrcd->refcnt);
+	put_uobj_read(xrcd_uobj);
 
 err_put_cq:
 	put_cq_read(xrc_cq);
@@ -2300,6 +2314,9 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
+	if (obj->uxrcd)
+		atomic_dec(&obj->uxrcd->refcnt);
+
 	idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
 
 	mutex_lock(&file->mutex);
@@ -2320,6 +2337,93 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	return ret ? ret : in_len;
 }
 
+struct xrcd_table_entry {
+	struct rb_node	node;
+	struct ib_xrcd *xrcd;
+	struct inode   *inode;
+};
+
+static int xrcd_table_insert(struct ib_uverbs_device *dev,
+			     struct inode *inode,
+			     struct ib_xrcd *xrcd)
+{
+	struct xrcd_table_entry *entry, *scan;
+	struct rb_node **p = &dev->xrcd_tree.rb_node;
+	struct rb_node *parent = NULL;
+
+	entry = kmalloc(sizeof *entry, GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->xrcd  = xrcd;
+	entry->inode = inode;
+
+	while (*p) {
+		parent = *p;
+		scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+		if (inode < scan->inode)
+			p = &(*p)->rb_left;
+		else if (inode > scan->inode)
+			p = &(*p)->rb_right;
+		else {
+			kfree(entry);
+			return -EEXIST;
+		}
+	}
+
+	rb_link_node(&entry->node, parent, p);
+	rb_insert_color(&entry->node, &dev->xrcd_tree);
+
+	igrab(inode);
+
+	return 0;
+}
+
+static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
+						  struct inode *inode)
+{
+	struct xrcd_table_entry *entry;
+	struct rb_node *p = dev->xrcd_tree.rb_node;
+
+	while (p) {
+		entry = rb_entry(p, struct xrcd_table_entry, node);
+
+		if (inode < entry->inode)
+			p = p->rb_left;
+		else if (inode > entry->inode)
+			p = p->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
+{
+	struct xrcd_table_entry *entry;
+
+	entry = xrcd_table_search(dev, inode);
+	if (!entry)
+		return NULL;
+
+	return entry->xrcd;
+}
+
+
+static void xrcd_table_delete(struct ib_uverbs_device *dev,
+			      struct inode *inode)
+{
+	struct xrcd_table_entry *entry = xrcd_table_search(dev, inode);
+
+	if (entry) {
+		iput(inode);
+		rb_erase(&entry->node, &dev->xrcd_tree);
+		kfree(entry);
+	}
+}
+
 ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 			    const char __user *buf, int in_len,
 			    int out_len)
@@ -2328,8 +2432,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	struct ib_uverbs_open_xrcd_resp	resp;
 	struct ib_udata			udata;
 	struct ib_uxrcd_object	       *obj;
-	struct ib_xrcd		       *xrcd;
-	int ret;
+	struct ib_xrcd		       *xrcd = NULL;
+	struct file		       *f = NULL;
+	struct inode		       *inode = NULL;
+	int				ret = 0;
+	int				new_xrcd = 0;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -2337,32 +2444,64 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	/* file descriptors/inodes not yet implemented */
-	if (cmd.fd != -1)
-		return -ENOSYS;
-
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
+	mutex_lock(&file->device->xrcd_tree_mutex);
+
+	if (cmd.fd != -1) {
+		/* search for file descriptor */
+		f = fget(cmd.fd);
+		if (!f) {
+			ret = -EBADF;
+			goto err_tree_mutex_unlock;
+		}
+
+		inode = f->f_dentry->d_inode;
+		if (!inode) {
+			ret = -EBADF;
+			goto err_tree_mutex_unlock;
+		}
+
+		xrcd = find_xrcd(file->device, inode);
+		if (!xrcd && !(cmd.oflags & O_CREAT)) {
+			/* no file descriptor. Need CREATE flag */
+			ret = -EAGAIN;
+			goto err_tree_mutex_unlock;
+		}
+
+		if (xrcd && cmd.oflags & O_EXCL) {
+			ret = -EINVAL;
+			goto err_tree_mutex_unlock;
+		}
+	}
+
 	obj = kmalloc(sizeof *obj, GFP_KERNEL);
-	if (!obj)
-		return -ENOMEM;
+	if (!obj) {
+		ret = -ENOMEM;
+		goto err_tree_mutex_unlock;
+	}
 
 	init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key);
+
 	down_write(&obj->uobject.mutex);
 
-	xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
-						file->ucontext, &udata);
-	if (IS_ERR(xrcd)) {
-		ret = PTR_ERR(xrcd);
-		goto err;
-	}
+	if (!xrcd) {
+		xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+							file->ucontext, &udata);
+		if (IS_ERR(xrcd)) {
+			ret = PTR_ERR(xrcd);
+			goto err;
+		}
 
-	xrcd->uobject = &obj->uobject;
-	xrcd->device  = file->device->ib_dev;
-	atomic_set(&xrcd->usecnt, 0);
+		xrcd->inode   = inode;
+		xrcd->device  = file->device->ib_dev;
+		atomic_set(&xrcd->usecnt, 0);
+		new_xrcd = 1;
+	}
 
+	atomic_set(&obj->refcnt, 0);
 	obj->uobject.object = xrcd;
 	ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
 	if (ret)
@@ -2371,12 +2510,25 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	memset(&resp, 0, sizeof resp);
 	resp.xrcd_handle = obj->uobject.id;
 
+	if (inode) {
+		if (new_xrcd) {
+			/* create new inode/xrcd table entry */
+			ret = xrcd_table_insert(file->device, inode, xrcd);
+			if (ret)
+				goto err_insert_xrcd;
+		}
+		atomic_inc(&xrcd->usecnt);
+	}
+
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
 		goto err_copy;
 	}
 
+	if (f)
+		fput(f);
+
 	mutex_lock(&file->mutex);
 	list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
 	mutex_unlock(&file->mutex);
@@ -2385,9 +2537,17 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 
 	up_write(&obj->uobject.mutex);
 
+	mutex_unlock(&file->device->xrcd_tree_mutex);
 	return in_len;
 
 err_copy:
+	if (inode) {
+		if (new_xrcd)
+			xrcd_table_delete(file->device, inode);
+		atomic_dec(&xrcd->usecnt);
+	}
+
+err_insert_xrcd:
 	idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
 
 err_idr:
@@ -2395,33 +2555,66 @@ err_idr:
 
 err:
 	put_uobj_write(&obj->uobject);
+
+err_tree_mutex_unlock:
+	if (f)
+		fput(f);
+
+	mutex_unlock(&file->device->xrcd_tree_mutex);
+
 	return ret;
 }
 
 ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
-				   const char __user *buf, int in_len,
-				   int out_len)
+			     const char __user *buf, int in_len,
+			     int out_len)
 {
 	struct ib_uverbs_close_xrcd	cmd;
 	struct ib_uobject	       *uobj;
-	int				ret;
+	struct ib_xrcd		       *xrcd = NULL;
+	struct inode		       *inode = NULL;
+	struct ib_uxrcd_object	       *obj;
+	int				live;
+	int				ret = 0;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	mutex_lock(&file->device->xrcd_tree_mutex);
 	uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle,
 			      file->ucontext);
-	if (!uobj)
-		return -EINVAL;
+	if (!uobj) {
+		ret = -EINVAL;
+		goto out;
+	}
 
-	ret = ib_dealloc_xrcd(uobj->object);
-	if (!ret)
-		uobj->live = 0;
+	xrcd  = uobj->object;
+	inode = xrcd->inode;
+	obj   = container_of(uobj, struct ib_uxrcd_object, uobject);
+	if (atomic_read(&obj->refcnt)) {
+		ret = -EBUSY;
+		put_uobj_write(uobj);
+		goto out;
+	}
+
+	if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
+		ret = ib_dealloc_xrcd(uobj->object);
+		if (!ret)
+			uobj->live = 0;
+	}
+
+	live = uobj->live;
+
+	if (inode && ret)
+		atomic_inc(&xrcd->usecnt);
 
 	put_uobj_write(uobj);
 
 	if (ret)
-		return ret;
+		goto out;
+
+	if (inode && !live)
+		xrcd_table_delete(file->device, inode);
 
 	idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
 
@@ -2431,5 +2624,24 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
 
 	put_uobj(uobj);
 
-	return in_len;
+	ret = in_len;
+
+out:
+	mutex_unlock(&file->device->xrcd_tree_mutex);
+	return ret;
+}
+
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+			    struct ib_xrcd *xrcd)
+{
+	struct inode *inode;
+
+	inode = xrcd->inode;
+	if (inode && !atomic_dec_and_test(&xrcd->usecnt))
+		return;
+
+	ib_dealloc_xrcd(xrcd);
+
+	if (inode)
+		xrcd_table_delete(dev, inode);
 }
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2a97810..2b9d744 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -250,15 +250,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 		kfree(uobj);
 	}
 
+	mutex_lock(&file->device->xrcd_tree_mutex);
 	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
 		struct ib_xrcd *xrcd = uobj->object;
 		struct ib_uxrcd_object *uxrcd =
 			container_of(uobj, struct ib_uxrcd_object, uobject);
 
 		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
-		ib_dealloc_xrcd(xrcd);
+		ib_uverbs_dealloc_xrcd(file->device, xrcd);
 		kfree(uxrcd);
 	}
+	mutex_unlock(&file->device->xrcd_tree_mutex);
 
 	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
 		struct ib_pd *pd = uobj->object;
@@ -763,6 +765,8 @@ static void ib_uverbs_add_one(struct ib_device *device)
 
 	kref_init(&uverbs_dev->ref);
 	init_completion(&uverbs_dev->comp);
+	uverbs_dev->xrcd_tree = RB_ROOT;
+	mutex_init(&uverbs_dev->xrcd_tree_mutex);
 
 	spin_lock(&map_lock);
 	uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index b75193c..99f76b6 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1014,7 +1014,7 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
 	xrcd = device->alloc_xrcd(device, NULL, NULL);
 	if (!IS_ERR(xrcd)) {
 		xrcd->device  = device;
-		xrcd->uobject = NULL;
+		xrcd->inode   = NULL;
 		atomic_set(&xrcd->usecnt, 0);
 	}
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1d843c3..322d145 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -848,7 +848,7 @@ struct ib_pd {
 
 struct ib_xrcd {
 	struct ib_device       *device;
-	struct ib_uobject      *uobject;
+	struct inode	       *inode;
 	atomic_t		usecnt; /* count all resources */
 };
 
-- 
1.6.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] IB/uverbs: Support for associating XRC domains to inodes
       [not found]             ` <adaeil2gfy9.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
@ 2010-02-04  9:29               ` Jack Morgenstein
  0 siblings, 0 replies; 15+ messages in thread
From: Jack Morgenstein @ 2010-02-04  9:29 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Roland Dreier, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Tziporet Koren

On Wednesday 03 February 2010 23:35, Roland Dreier wrote:
>  > 2. In closing/deallocating xrc domains, it is not an error if ib_dealloc_xrcd returns "busy".
>  >    This simply means that there are other user processes still using that xrc domain.
>  >    The current process still needs to clean up its uobject.  In particular, the usecnt should
>  >    not be re-incremented on the busy return. (If it gets re-incremented on busy, no one can ever
>  >    succeed in closing an xrc domain if more that one process is using it).
> 
> I do agree we shouldn't reincrement the use count if the dealloc
> fails -- in ib_uverbs_dealloc_xrcd we're in the path of closing a
> context and so there's no real way to recover if the ib_dealloc_xrcd
> fails.

Agreed.  If we return an error when there is no i-node, then we should also return an error when
an i-node exists (and restore the status-quo-ante by reincrementing the refcnt in this case).
Although, this will leave things in a weird state (with an xrcd which cannot get deallocated).
Like you, I don't really see other options here.

> But I don't think the busy case is an issue -- we should return 
> without trying to dealloc if the xrcd is held by another process, since
> the atomic_dec_and_test will return a non-zero value in that case I
> think.
You are correct -- I missed that in my review.

> So the simplification in the patch below should be OK, right? 
Looks good!

-Jack
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2010-02-04  9:29 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-25 19:01 [PATCH 0/4] Current XRC queue Roland Dreier
     [not found] ` <1264446094-4460-1-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2010-01-25 19:01   ` [PATCH 1/4] IB/core: XRC base implementation Roland Dreier
2010-01-25 19:01   ` [PATCH 2/4] IB/uverbs: Support for XRC Roland Dreier
     [not found]     ` <1264446094-4460-3-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2010-01-26 14:39       ` Or Gerlitz
     [not found]         ` <4B5EFEAD.9040702-hKgKHo2Ms0FWk0Htik3J/w@public.gmane.org>
2010-01-26 23:51           ` Roland Dreier
     [not found]             ` <aday6jko26n.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-27  9:13               ` Or Gerlitz
     [not found]                 ` <4B6003B2.4040101-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-01-27 10:55                   ` Jack Morgenstein
2010-01-27 16:00                   ` Roland Dreier
     [not found]                     ` <ada8wbjo7v3.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-28  7:53                       ` Or Gerlitz
2010-01-25 19:01   ` [PATCH 3/4] IB/uverbs: Add struct ib_usrq_object and ib_uxrcd_object Roland Dreier
2010-01-25 19:01   ` [PATCH 4/4] IB/uverbs: Support for associating XRC domains to inodes Roland Dreier
     [not found]     ` <1264446094-4460-5-git-send-email-rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2010-02-01 13:52       ` Jack Morgenstein
     [not found]         ` <201002011552.24904.jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2010-02-03 21:35           ` Roland Dreier
     [not found]             ` <adaeil2gfy9.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-02-04  9:29               ` Jack Morgenstein
2010-01-26 11:16   ` [PATCH 0/4] Current XRC queue Tziporet Koren

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.