All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bob Pearson <rpearsonhpe@gmail.com>
To: Zhu Yanjun <zyjzyj2000@gmail.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>,
	RDMA mailing list <linux-rdma@vger.kernel.org>,
	Bob Pearson <rpearson@hpe.com>
Subject: Re: [PATCH for-next v2 7/9] RDMA/rxe: Add support for bind MW work requests
Date: Tue, 20 Apr 2021 23:24:43 -0500	[thread overview]
Message-ID: <7e799159-f39d-755d-cf5b-a58367e54d19@gmail.com> (raw)
In-Reply-To: <CAD=hENf8SA2==WhtufkcesuJH7cMys8vPNj4_1a=OAaE2GREOA@mail.gmail.com>

On 4/20/21 9:32 AM, Zhu Yanjun wrote:
> On Thu, Apr 15, 2021 at 10:55 AM Bob Pearson <rpearsonhpe@gmail.com> wrote:
>>
>> Add support for bind MW work requests from user space.
>> Since rdma/core does not support bind mw in ib_send_wr
>> there is no way to support bind mw in kernel space.
>>
>> Added bind_mw local operation in rxe_req.c
>> Added bind_mw WR operation in rxe_opcode.c
>> Added bind_mw WC in rxe_comp.c
>> Added additional fields to rxe_mw in rxe_verbs.h
>> Added do_dealloc_mw() subroutine to cleanup an mw
>> when rxe_dealloc_mw is called.
>> Added code to implement bind_mw operation in rxe_mw.c
>>
>> Signed-off-by: Bob Pearson <rpearson@hpe.com>
>> ---
>> v2:
>>   Dropped kernel support for bind_mw in rxe_mw.c
>>   Replaced umw with mw in rxe_send_wr.
>>
>>  drivers/infiniband/sw/rxe/rxe_comp.c   |   1 +
>>  drivers/infiniband/sw/rxe/rxe_loc.h    |   1 +
>>  drivers/infiniband/sw/rxe/rxe_mw.c     | 204 ++++++++++++++++++++++++-
>>  drivers/infiniband/sw/rxe/rxe_opcode.c |   7 +
>>  drivers/infiniband/sw/rxe/rxe_req.c    |   9 ++
>>  drivers/infiniband/sw/rxe/rxe_verbs.h  |  15 +-
>>  6 files changed, 232 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
>> index 2af26737d32d..bc5488af5f55 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_comp.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_comp.c
>> @@ -103,6 +103,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
>>         case IB_WR_RDMA_READ_WITH_INV:          return IB_WC_RDMA_READ;
>>         case IB_WR_LOCAL_INV:                   return IB_WC_LOCAL_INV;
>>         case IB_WR_REG_MR:                      return IB_WC_REG_MR;
>> +       case IB_WR_BIND_MW:                     return IB_WC_BIND_MW;
>>
>>         default:
>>                 return 0xff;
>> diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
>> index edf575930a98..e6f574973298 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_loc.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_loc.h
>> @@ -110,6 +110,7 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
>>  /* rxe_mw.c */
>>  int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
>>  int rxe_dealloc_mw(struct ib_mw *ibmw);
>> +int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
>>  void rxe_mw_cleanup(struct rxe_pool_entry *arg);
>>
>>  /* rxe_net.c */
>> diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
>> index 69128e298d44..6ced54126b72 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_mw.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_mw.c
>> @@ -29,6 +29,29 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
>>         return 0;
>>  }
>>
>> +static void do_dealloc_mw(struct rxe_mw *mw)
>> +{
>> +       if (mw->mr) {
>> +               struct rxe_mr *mr = mw->mr;
>> +
>> +               mw->mr = NULL;
>> +               atomic_dec(&mr->num_mw);
>> +               rxe_drop_ref(mr);
>> +       }
>> +
>> +       if (mw->qp) {
>> +               struct rxe_qp *qp = mw->qp;
>> +
>> +               mw->qp = NULL;
>> +               rxe_drop_ref(qp);
>> +       }
>> +
>> +       mw->access = 0;
>> +       mw->addr = 0;
>> +       mw->length = 0;
>> +       mw->state = RXE_MW_STATE_INVALID;
>> +}
>> +
>>  int rxe_dealloc_mw(struct ib_mw *ibmw)
>>  {
>>         struct rxe_mw *mw = to_rmw(ibmw);
>> @@ -36,7 +59,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
>>         unsigned long flags;
>>
>>         spin_lock_irqsave(&mw->lock, flags);
>> -       mw->state = RXE_MW_STATE_INVALID;
>> +       do_dealloc_mw(mw);
>>         spin_unlock_irqrestore(&mw->lock, flags);
>>
>>         rxe_drop_ref(mw);
>> @@ -45,6 +68,185 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
>>         return 0;
>>  }
>>
>> +static int check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
>> +                        struct rxe_mw *mw, struct rxe_mr *mr)
>> +{
>> +       if (mw->ibmw.type == IB_MW_TYPE_1) {
>> +               if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
>> +                       pr_err_once(
>> +                               "attempt to bind a type 1 MW not in the valid state\n");
>> +                       return -EINVAL;
>> +               }
>> +
>> +               /* o10-36.2.2 */
>> +               if (unlikely((mw->access & IB_ZERO_BASED))) {
>> +                       pr_err_once("attempt to bind a zero based type 1 MW\n");
>> +                       return -EINVAL;
>> +               }
>> +       }
>> +
>> +       if (mw->ibmw.type == IB_MW_TYPE_2) {
>> +               /* o10-37.2.30 */
>> +               if (unlikely(mw->state != RXE_MW_STATE_FREE)) {
>> +                       pr_err_once(
>> +                               "attempt to bind a type 2 MW not in the free state\n");
>> +                       return -EINVAL;
>> +               }
>> +
>> +               /* C10-72 */
>> +               if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
>> +                       pr_err_once(
>> +                               "attempt to bind type 2 MW with qp with different PD\n");
>> +                       return -EINVAL;
>> +               }
>> +
>> +               /* o10-37.2.40 */
>> +               if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) {
>> +                       pr_err_once(
>> +                               "attempt to invalidate type 2 MW by binding with NULL or zero length MR\n");
>> +                       return -EINVAL;
>> +               }
>> +       }
>> +
>> +       if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) {
>> +               pr_err_once("attempt to bind MW with same key\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* remaining checks only apply to a nonzero MR */
>> +       if (!mr)
>> +               return 0;
>> +
>> +       if (unlikely(mr->access & IB_ZERO_BASED)) {
>> +               pr_err_once("attempt to bind MW to zero based MR\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* C10-73 */
>> +       if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
>> +               pr_err_once(
>> +                       "attempt to bind an MW to an MR without bind access\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* C10-74 */
>> +       if (unlikely((mw->access &
>> +                     (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
>> +                    !(mr->access & IB_ACCESS_LOCAL_WRITE))) {
>> +               pr_err_once(
>> +                       "attempt to bind an writeable MW to an MR without local write access\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* C10-75 */
>> +       if (mw->access & IB_ZERO_BASED) {
>> +               if (unlikely(wqe->wr.wr.mw.length > mr->length)) {
>> +                       pr_err_once(
>> +                               "attempt to bind a ZB MW outside of the MR\n");
>> +                       return -EINVAL;
>> +               }
>> +       } else {
>> +               if (unlikely((wqe->wr.wr.mw.addr < mr->iova) ||
>> +                            ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
>> +                             (mr->iova + mr->length)))) {
>> +                       pr_err_once(
>> +                               "attempt to bind a VA MW outside of the MR\n");
>> +                       return -EINVAL;
>> +               }
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static int do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
>> +                     struct rxe_mw *mw, struct rxe_mr *mr)
>> +{
>> +       u32 rkey;
>> +       u32 new_rkey;
>> +
>> +       rkey = mw->ibmw.rkey;
>> +       new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff);
>> +
>> +       mw->ibmw.rkey = new_rkey;
>> +       mw->access = wqe->wr.wr.mw.access;
>> +       mw->state = RXE_MW_STATE_VALID;
>> +       mw->addr = wqe->wr.wr.mw.addr;
>> +       mw->length = wqe->wr.wr.mw.length;
>> +
>> +       if (mw->mr) {
>> +               rxe_drop_ref(mw->mr);
>> +               atomic_dec(&mw->mr->num_mw);
>> +               mw->mr = NULL;
>> +       }
>> +
>> +       if (mw->length) {
>> +               mw->mr = mr;
>> +               atomic_inc(&mr->num_mw);
>> +               rxe_add_ref(mr);
>> +       }
>> +
>> +       if (mw->ibmw.type == IB_MW_TYPE_2) {
>> +               rxe_add_ref(qp);
>> +               mw->qp = qp;
>> +       }
>> +
>> +       return 0;
> 
> Is it necessary to return 0?
> 
> no others invalid value.
> 
> Zhu Yanjun

Good catch. It was left over from the past. I'll get rid of it.

Bob

  reply	other threads:[~2021-04-21  4:25 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-15  2:54 [PATCH for-next v2 0/9] RDMA/rxe: Implement memory windows Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 1/9] RDMA/rxe: Add bind MW fields to rxe_send_wr Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 2/9] RDMA/rxe: Return errors for add index and key Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 3/9] RDMA/rxe: Enable MW object pool Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 4/9] RDMA/rxe: Add ib_alloc_mw and ib_dealloc_mw verbs Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 5/9] RDMA/rxe: Replace WR_REG_MASK by WR_LOCAL_OP_MASK Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 6/9] RDMA/rxe: Move local ops to subroutine Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 7/9] RDMA/rxe: Add support for bind MW work requests Bob Pearson
2021-04-20 14:32   ` Zhu Yanjun
2021-04-21  4:24     ` Bob Pearson [this message]
2021-04-15  2:54 ` [PATCH for-next v2 8/9] RDMA/rxe: Implement invalidate MW operations Bob Pearson
2021-04-20  6:38   ` Zhu Yanjun
2021-04-21  4:22     ` Bob Pearson
2021-04-15  2:54 ` [PATCH for-next v2 9/9] RDMA/rxe: Implement memory access through MWs Bob Pearson
2021-04-20  6:34   ` Zhu Yanjun
2021-04-20 12:04     ` Jason Gunthorpe
2021-04-21  4:09       ` Bob Pearson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7e799159-f39d-755d-cf5b-a58367e54d19@gmail.com \
    --to=rpearsonhpe@gmail.com \
    --cc=jgg@nvidia.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=rpearson@hpe.com \
    --cc=zyjzyj2000@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.