linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Krishnamraju Eraparaju <krishna2@chelsio.com>
To: Sagi Grimberg <sagi@grimberg.me>,
	Steve Wise <larrystevenwise@gmail.com>,
	Bernard Metzler <BMT@zurich.ibm.com>
Cc: "linux-rdma@vger.kernel.org" <linux-rdma@vger.kernel.org>,
	Jason Gunthorpe <jgg@ziepe.ca>
Subject: Re: [PATCH v3] iwcm: don't hold the irq disabled lock on iw_rem_ref
Date: Wed, 11 Sep 2019 00:51:59 +0530	[thread overview]
Message-ID: <20190910192157.GA5954@chelsio.com> (raw)
In-Reply-To: <5cc42f23-bf60-ca8d-f40c-cbd8875f5756@grimberg.me>

Please review the below patch, I will resubmit this in patch-series
after review.
- As kput_ref handler(siw_free_qp) uses vfree, iwcm can't call
  iw_rem_ref() with spinlocks held. Doing so can cause vfree() to sleep
  with irq disabled.
  Two possible solutions:
  1)With spinlock acquired, take a copy of "cm_id_priv->qp" and update
    it to NULL. And after releasing lock use the copied qp pointer for
    rem_ref().
  2)Replacing issue causing vmalloc()/vfree to kmalloc()/kfree in SIW
    driver, may not be a ideal solution.
  
  Solution 2 may not be ideal as allocating huge contigous memory for
   SQ & RQ doesn't look appropriate.
  
- The structure "siw_base_qp" is getting freed in siw_destroy_qp(), but
  if cm_close_handler() holds the last reference, then siw_free_qp(),
  via cm_close_handler(), tries to get already freed "siw_base_qp" from
  "ib_qp". 
   Hence, "siw_base_qp" should be freed at the end of siw_free_qp().


diff --git a/drivers/infiniband/core/iwcm.c
b/drivers/infiniband/core/iwcm.c
index 72141c5b7c95..d5ab69fa598a 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -373,6 +373,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
 {
        struct iwcm_id_private *cm_id_priv;
        unsigned long flags;
+       struct ib_qp *qp;

        cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
        /*
@@ -389,6 +390,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
        set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);

        spin_lock_irqsave(&cm_id_priv->lock, flags);
+       qp = cm_id_priv->qp;
+       cm_id_priv->qp = NULL;
+
        switch (cm_id_priv->state) {
        case IW_CM_STATE_LISTEN:
                cm_id_priv->state = IW_CM_STATE_DESTROYING;
@@ -401,7 +405,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
                cm_id_priv->state = IW_CM_STATE_DESTROYING;
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
                /* Abrupt close of the connection */
-               (void)iwcm_modify_qp_err(cm_id_priv->qp);
+               (void)iwcm_modify_qp_err(qp);
                spin_lock_irqsave(&cm_id_priv->lock, flags);
                break;
        case IW_CM_STATE_IDLE:
@@ -426,11 +430,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
                BUG();
                break;
        }
-       if (cm_id_priv->qp) {
-               cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
-               cm_id_priv->qp = NULL;
-       }
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       if (qp)
+               cm_id_priv->id.device->ops.iw_rem_ref(qp);

        if (cm_id->mapped) {
                iwpm_remove_mapinfo(&cm_id->local_addr,
&cm_id->m_local_addr);
@@ -671,11 +673,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
                BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
                cm_id_priv->state = IW_CM_STATE_IDLE;
                spin_lock_irqsave(&cm_id_priv->lock, flags);
-               if (cm_id_priv->qp) {
-                       cm_id->device->ops.iw_rem_ref(qp);
-                       cm_id_priv->qp = NULL;
-               }
+               qp = cm_id_priv->qp;
+               cm_id_priv->qp = NULL;
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               if (qp)
+                       cm_id->device->ops.iw_rem_ref(qp);
                clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
                wake_up_all(&cm_id_priv->connect_wait);
        }
@@ -730,13 +732,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct
iw_cm_conn_param *iw_param)
                return 0;       /* success */

        spin_lock_irqsave(&cm_id_priv->lock, flags);
-       if (cm_id_priv->qp) {
-               cm_id->device->ops.iw_rem_ref(qp);
-               cm_id_priv->qp = NULL;
-       }
+       qp = cm_id_priv->qp;
+       cm_id_priv->qp = NULL;
        cm_id_priv->state = IW_CM_STATE_IDLE;
 err:
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       if (qp)
+               cm_id->device->ops.iw_rem_ref(qp);
        clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
        wake_up_all(&cm_id_priv->connect_wait);
        return ret;
@@ -880,6 +882,7 @@ static int cm_conn_rep_handler(struct
iwcm_id_private *cm_id_priv,
 {
        unsigned long flags;
        int ret;
+       struct ib_qp *qp = NULL;

        spin_lock_irqsave(&cm_id_priv->lock, flags);
        /*
@@ -896,11 +899,13 @@ static int cm_conn_rep_handler(struct
iwcm_id_private *cm_id_priv,
                cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
        } else {
                /* REJECTED or RESET */
-               cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
+               qp = cm_id_priv->qp;
                cm_id_priv->qp = NULL;
                cm_id_priv->state = IW_CM_STATE_IDLE;
        }
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       if (qp)
+               cm_id_priv->id.device->ops.iw_rem_ref(qp);
        ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);

        if (iw_event->private_data_len)
@@ -944,12 +949,12 @@ static int cm_close_handler(struct iwcm_id_private
*cm_id_priv,
 {
        unsigned long flags;
        int ret = 0;
+       struct ib_qp *qp;
+
        spin_lock_irqsave(&cm_id_priv->lock, flags);
+       qp = cm_id_priv->qp;
+       cm_id_priv->qp = NULL;

-       if (cm_id_priv->qp) {
-               cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
-               cm_id_priv->qp = NULL;
-       }
        switch (cm_id_priv->state) {
        case IW_CM_STATE_ESTABLISHED:
        case IW_CM_STATE_CLOSING:
@@ -965,6 +970,8 @@ static int cm_close_handler(struct iwcm_id_private
*cm_id_priv,
        }
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);

+       if (qp)
+               cm_id_priv->id.device->ops.iw_rem_ref(qp);
        return ret;
 }

diff --git a/drivers/infiniband/sw/siw/siw_qp.c
b/drivers/infiniband/sw/siw/siw_qp.c
index 430314c8abd9..cb177688a49f 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1307,6 +1307,7 @@ void siw_free_qp(struct kref *ref)
        struct siw_qp *found, *qp = container_of(ref, struct siw_qp,
ref);
        struct siw_device *sdev = qp->sdev;
        unsigned long flags;
+       struct siw_base_qp *siw_base_qp = to_siw_base_qp(qp->ib_qp);

        if (qp->cep)
                siw_cep_put(qp->cep);
@@ -1327,4 +1328,5 @@ void siw_free_qp(struct kref *ref)
        atomic_dec(&sdev->num_qp);
        siw_dbg_qp(qp, "free QP\n");
        kfree_rcu(qp, rcu);
+       kfree(siw_base_qp);
 }
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c
b/drivers/infiniband/sw/siw/siw_verbs.c
index da52c90e06d4..ac08d84d84cb 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -603,7 +603,6 @@ int siw_verbs_modify_qp(struct ib_qp *base_qp,
struct ib_qp_attr *attr,
 int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
 {
        struct siw_qp *qp = to_siw_qp(base_qp);
-       struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp);
        struct siw_ucontext *uctx =
                rdma_udata_to_drv_context(udata, struct siw_ucontext,
                                          base_ucontext);
@@ -640,7 +639,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct
ib_udata *udata)
        qp->scq = qp->rcq = NULL;

        siw_qp_put(qp);
-       kfree(siw_base_qp);

        return 0;
 }


On Tuesday, September 09/10/19, 2019 at 22:23:13 +0530, Sagi Grimberg wrote:
> 
> >> This may be the final put on a qp and result in freeing
> >> resourcesand should not be done with interrupts disabled.
> > 
> > Hi Sagi,
> > 
> > Few things to consider in fixing this completely:
> >    - there are some other places where iw_rem_ref() should be called
> >      after spinlock critical section. eg: in cm_close_handler(),
> > iw_cm_connect(),...
> >    - Any modifications to "cm_id_priv" should be done with in spinlock
> >      critical section, modifying cm_id_priv->qp outside spinlocks, even
> > with atomic xchg(), might be error prone.
> >    - the structure "siw_base_qp" is getting freed in siw_destroy_qp(),
> >      but it should be done at the end of siw_free_qp().
> 
> Not sure why you say that, at the end of this function ->qp will be null
> anyways...
 Hope the above description and patch answers this.
> 
> >    
> > I am about to finish writing a patch that cover all the above issues.
> > Will test it and submit here by EOD.
> 
> Sure, you take it. Just stumbled on it so thought I'd go ahead and send
> a patch...

  reply	other threads:[~2019-09-10 19:22 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-04 21:25 [PATCH v3] iwcm: don't hold the irq disabled lock on iw_rem_ref Sagi Grimberg
2019-09-10 11:18 ` Krishnamraju Eraparaju
2019-09-10 16:53   ` Sagi Grimberg
2019-09-10 19:21     ` Krishnamraju Eraparaju [this message]
2019-09-11  9:38     ` Bernard Metzler
2019-09-11 14:42       ` Steve Wise
2019-09-11 15:58         ` Krishnamraju Eraparaju
2019-09-16 16:28           ` Jason Gunthorpe
2019-09-17  9:04           ` Bernard Metzler
2019-09-17 12:47             ` Krishnamraju Eraparaju
2019-09-17 13:40             ` Bernard Metzler
2019-09-17 17:20             ` Sagi Grimberg
2019-09-18 13:43               ` Krishnamraju Eraparaju
2019-09-21 18:35                 ` Krishnamraju Eraparaju
2019-10-01 17:17                   ` Krishnamraju Eraparaju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190910192157.GA5954@chelsio.com \
    --to=krishna2@chelsio.com \
    --cc=BMT@zurich.ibm.com \
    --cc=jgg@ziepe.ca \
    --cc=larrystevenwise@gmail.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).