All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
@ 2016-04-25  8:01 Hannes Reinecke
  2016-04-25 20:26 ` Ewan D. Milne
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Hannes Reinecke @ 2016-04-25  8:01 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Christoph Hellwig, Ewan Milne, James Bottomley, linux-scsi,
	Hannes Reinecke

We cannot use an embedded mutex in a structure with reference
counting, as mutex unlock might be delayed, and the waiters
might then access an already freed memory area.
So convert it to a spinlock.

For details cf https://lkml.org/lkml/2015/2/11/245

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/libfc/fc_rport.c | 90 +++++++++++++++++++++----------------------
 include/scsi/libfc.h          |  4 +-
 2 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 589ff9a..4520b5a 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -136,7 +136,7 @@ static struct fc_rport_priv *fc_rport_create(struct fc_lport *lport,
 	rdata->ids.roles = FC_RPORT_ROLE_UNKNOWN;
 
 	kref_init(&rdata->kref);
-	mutex_init(&rdata->rp_mutex);
+	spin_lock_init(&rdata->rp_lock);
 	rdata->local_port = lport;
 	rdata->rp_state = RPORT_ST_INIT;
 	rdata->event = RPORT_EV_NONE;
@@ -251,7 +251,7 @@ static void fc_rport_work(struct work_struct *work)
 	struct fc4_prov *prov;
 	u8 type;
 
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 	event = rdata->event;
 	rport_ops = rdata->ops;
 	rport = rdata->rport;
@@ -264,7 +264,7 @@ static void fc_rport_work(struct work_struct *work)
 		rdata->event = RPORT_EV_NONE;
 		rdata->major_retries = 0;
 		kref_get(&rdata->kref);
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 
 		if (!rport)
 			rport = fc_remote_port_add(lport->host, 0, &ids);
@@ -274,7 +274,7 @@ static void fc_rport_work(struct work_struct *work)
 			kref_put(&rdata->kref, lport->tt.rport_destroy);
 			return;
 		}
-		mutex_lock(&rdata->rp_mutex);
+		spin_lock(&rdata->rp_lock);
 		if (rdata->rport)
 			FC_RPORT_DBG(rdata, "rport already allocated\n");
 		rdata->rport = rport;
@@ -287,7 +287,7 @@ static void fc_rport_work(struct work_struct *work)
 		rpriv->flags = rdata->flags;
 		rpriv->e_d_tov = rdata->e_d_tov;
 		rpriv->r_a_tov = rdata->r_a_tov;
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 
 		if (rport_ops && rport_ops->event_callback) {
 			FC_RPORT_DBG(rdata, "callback ev %d\n", event);
@@ -313,7 +313,7 @@ static void fc_rport_work(struct work_struct *work)
 			mutex_unlock(&fc_prov_mutex);
 		}
 		port_id = rdata->ids.port_id;
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 
 		if (rport_ops && rport_ops->event_callback) {
 			FC_RPORT_DBG(rdata, "callback ev %d\n", event);
@@ -334,18 +334,18 @@ static void fc_rport_work(struct work_struct *work)
 		if (rport) {
 			rpriv = rport->dd_data;
 			rpriv->rp_state = RPORT_ST_DELETE;
-			mutex_lock(&rdata->rp_mutex);
+			spin_lock(&rdata->rp_lock);
 			rdata->rport = NULL;
-			mutex_unlock(&rdata->rp_mutex);
+			spin_unlock(&rdata->rp_lock);
 			fc_remote_port_delete(rport);
 		}
 
 		mutex_lock(&lport->disc.disc_mutex);
-		mutex_lock(&rdata->rp_mutex);
+		spin_lock(&rdata->rp_lock);
 		if (rdata->rp_state == RPORT_ST_DELETE) {
 			if (port_id == FC_FID_DIR_SERV) {
 				rdata->event = RPORT_EV_NONE;
-				mutex_unlock(&rdata->rp_mutex);
+				spin_unlock(&rdata->rp_lock);
 				kref_put(&rdata->kref, lport->tt.rport_destroy);
 			} else if ((rdata->flags & FC_RP_STARTED) &&
 				   rdata->major_retries <
@@ -354,11 +354,11 @@ static void fc_rport_work(struct work_struct *work)
 				rdata->event = RPORT_EV_NONE;
 				FC_RPORT_DBG(rdata, "work restart\n");
 				fc_rport_enter_flogi(rdata);
-				mutex_unlock(&rdata->rp_mutex);
+				spin_unlock(&rdata->rp_lock);
 			} else {
 				FC_RPORT_DBG(rdata, "work delete\n");
 				list_del_rcu(&rdata->peers);
-				mutex_unlock(&rdata->rp_mutex);
+				spin_unlock(&rdata->rp_lock);
 				kref_put(&rdata->kref, lport->tt.rport_destroy);
 			}
 		} else {
@@ -368,13 +368,13 @@ static void fc_rport_work(struct work_struct *work)
 			rdata->event = RPORT_EV_NONE;
 			if (rdata->rp_state == RPORT_ST_READY)
 				fc_rport_enter_ready(rdata);
-			mutex_unlock(&rdata->rp_mutex);
+			spin_unlock(&rdata->rp_lock);
 		}
 		mutex_unlock(&lport->disc.disc_mutex);
 		break;
 
 	default:
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 		break;
 	}
 }
@@ -393,7 +393,7 @@ static void fc_rport_work(struct work_struct *work)
  */
 static int fc_rport_login(struct fc_rport_priv *rdata)
 {
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 
 	rdata->flags |= FC_RP_STARTED;
 	switch (rdata->rp_state) {
@@ -409,7 +409,7 @@ static int fc_rport_login(struct fc_rport_priv *rdata)
 		fc_rport_enter_flogi(rdata);
 		break;
 	}
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 
 	return 0;
 }
@@ -453,7 +453,7 @@ static void fc_rport_enter_delete(struct fc_rport_priv *rdata,
  */
 static int fc_rport_logoff(struct fc_rport_priv *rdata)
 {
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 
 	FC_RPORT_DBG(rdata, "Remove port\n");
 
@@ -470,7 +470,7 @@ static int fc_rport_logoff(struct fc_rport_priv *rdata)
 	 */
 	fc_rport_enter_delete(rdata, RPORT_EV_STOP);
 out:
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 	return 0;
 }
 
@@ -505,7 +505,7 @@ static void fc_rport_timeout(struct work_struct *work)
 	struct fc_rport_priv *rdata =
 		container_of(work, struct fc_rport_priv, retry_work.work);
 
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 
 	switch (rdata->rp_state) {
 	case RPORT_ST_FLOGI:
@@ -530,7 +530,7 @@ static void fc_rport_timeout(struct work_struct *work)
 		break;
 	}
 
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 }
 
 /**
@@ -666,7 +666,7 @@ static void fc_rport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 	if (fp == ERR_PTR(-FC_EX_CLOSED))
 		goto put;
 
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 
 	if (rdata->rp_state != RPORT_ST_FLOGI) {
 		FC_RPORT_DBG(rdata, "Received a FLOGI response, but in state "
@@ -700,7 +700,7 @@ static void fc_rport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 out:
 	fc_frame_free(fp);
 err:
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 put:
 	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 	return;
@@ -783,7 +783,7 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 		rjt_data.explan = ELS_EXPL_NOT_NEIGHBOR;
 		goto reject;
 	}
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 
 	FC_RPORT_DBG(rdata, "Received FLOGI in %s state\n",
 		     fc_rport_state(rdata));
@@ -805,7 +805,7 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 		if (lport->point_to_multipoint)
 			break;
 	case RPORT_ST_DELETE:
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 		rjt_data.reason = ELS_RJT_FIP;
 		rjt_data.explan = ELS_EXPL_NOT_NEIGHBOR;
 		goto reject;
@@ -822,13 +822,13 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 		 * This queues work to be sure exchanges are reset.
 		 */
 		fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 		rjt_data.reason = ELS_RJT_BUSY;
 		rjt_data.explan = ELS_EXPL_NONE;
 		goto reject;
 	}
 	if (fc_rport_login_complete(rdata, fp)) {
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 		rjt_data.reason = ELS_RJT_LOGIC;
 		rjt_data.explan = ELS_EXPL_NONE;
 		goto reject;
@@ -850,7 +850,7 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
 	else
 		fc_rport_state_enter(rdata, RPORT_ST_PLOGI_WAIT);
 out:
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 	mutex_unlock(&disc->disc_mutex);
 	fc_frame_free(rx_fp);
 	return;
@@ -881,7 +881,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 	u16 cssp_seq;
 	u8 op;
 
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 
 	FC_RPORT_DBG(rdata, "Received a PLOGI %s\n", fc_els_resp_type(fp));
 
@@ -922,7 +922,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 out:
 	fc_frame_free(fp);
 err:
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 }
 
@@ -1005,7 +1005,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 	u8 op;
 	u8 resp_code = 0;
 
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 
 	FC_RPORT_DBG(rdata, "Received a PRLI %s\n", fc_els_resp_type(fp));
 
@@ -1075,7 +1075,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 out:
 	fc_frame_free(fp);
 err:
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 }
 
@@ -1153,7 +1153,7 @@ static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
 	struct fc_rport_priv *rdata = rdata_arg;
 	u8 op;
 
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 
 	FC_RPORT_DBG(rdata, "Received a RTV %s\n", fc_els_resp_type(fp));
 
@@ -1197,7 +1197,7 @@ static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
 out:
 	fc_frame_free(fp);
 err:
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 }
 
@@ -1289,7 +1289,7 @@ static void fc_rport_adisc_resp(struct fc_seq *sp, struct fc_frame *fp,
 	struct fc_els_adisc *adisc;
 	u8 op;
 
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 
 	FC_RPORT_DBG(rdata, "Received a ADISC response\n");
 
@@ -1326,7 +1326,7 @@ static void fc_rport_adisc_resp(struct fc_seq *sp, struct fc_frame *fp,
 out:
 	fc_frame_free(fp);
 err:
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 }
 
@@ -1483,7 +1483,7 @@ static void fc_rport_recv_els_req(struct fc_lport *lport, struct fc_frame *fp)
 		mutex_unlock(&lport->disc.disc_mutex);
 		goto reject;
 	}
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 	mutex_unlock(&lport->disc.disc_mutex);
 
 	switch (rdata->rp_state) {
@@ -1493,7 +1493,7 @@ static void fc_rport_recv_els_req(struct fc_lport *lport, struct fc_frame *fp)
 	case RPORT_ST_ADISC:
 		break;
 	default:
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 		goto reject;
 	}
 
@@ -1523,7 +1523,7 @@ static void fc_rport_recv_els_req(struct fc_lport *lport, struct fc_frame *fp)
 		break;
 	}
 
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 	return;
 
 reject:
@@ -1616,7 +1616,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 		goto reject;
 	}
 
-	mutex_lock(&rdata->rp_mutex);
+	spin_lock(&rdata->rp_lock);
 	mutex_unlock(&disc->disc_mutex);
 
 	rdata->ids.port_name = get_unaligned_be64(&pl->fl_wwpn);
@@ -1643,7 +1643,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 	case RPORT_ST_PLOGI:
 		FC_RPORT_DBG(rdata, "Received PLOGI in PLOGI state\n");
 		if (rdata->ids.port_name < lport->wwpn) {
-			mutex_unlock(&rdata->rp_mutex);
+			spin_unlock(&rdata->rp_lock);
 			rjt_data.reason = ELS_RJT_INPROG;
 			rjt_data.explan = ELS_EXPL_NONE;
 			goto reject;
@@ -1661,14 +1661,14 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 	case RPORT_ST_DELETE:
 		FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n",
 			     fc_rport_state(rdata));
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 		rjt_data.reason = ELS_RJT_BUSY;
 		rjt_data.explan = ELS_EXPL_NONE;
 		goto reject;
 	}
 	if (!fc_rport_compatible_roles(lport, rdata)) {
 		FC_RPORT_DBG(rdata, "Received PLOGI for incompatible role\n");
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 		rjt_data.reason = ELS_RJT_LOGIC;
 		rjt_data.explan = ELS_EXPL_NONE;
 		goto reject;
@@ -1691,7 +1691,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 	lport->tt.frame_send(lport, fp);
 	fc_rport_enter_prli(rdata);
 out:
-	mutex_unlock(&rdata->rp_mutex);
+	spin_unlock(&rdata->rp_lock);
 	fc_frame_free(rx_fp);
 	return;
 
@@ -1910,12 +1910,12 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp)
 	mutex_lock(&lport->disc.disc_mutex);
 	rdata = lport->tt.rport_lookup(lport, sid);
 	if (rdata) {
-		mutex_lock(&rdata->rp_mutex);
+		spin_lock(&rdata->rp_lock);
 		FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
 			     fc_rport_state(rdata));
 
 		fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
-		mutex_unlock(&rdata->rp_mutex);
+		spin_unlock(&rdata->rp_lock);
 	} else
 		FC_RPORT_ID_DBG(lport, sid,
 				"Received LOGO from non-logged-in port\n");
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 93d14da..7d63d23 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -187,7 +187,7 @@ struct fc_rport_libfc_priv {
  * @major_retries:  The retry count for the entire PLOGI/PRLI state machine
  * @e_d_tov:        Error detect timeout value (in msec)
  * @r_a_tov:        Resource allocation timeout value (in msec)
- * @rp_mutex:       The mutex that protects the remote port
+ * @rp_lock:        The lock that protects the remote port
  * @retry_work:     Handle for retries
  * @event_callback: Callback when READY, FAILED or LOGO states complete
  * @prli_count:     Count of open PRLI sessions in providers
@@ -207,7 +207,7 @@ struct fc_rport_priv {
 	unsigned int	            major_retries;
 	unsigned int	            e_d_tov;
 	unsigned int	            r_a_tov;
-	struct mutex                rp_mutex;
+	spinlock_t		    rp_lock;
 	struct delayed_work	    retry_work;
 	enum fc_rport_event         event;
 	struct fc_rport_operations  *ops;
-- 
1.8.5.6


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
  2016-04-25  8:01 [PATCH] libfc: replace 'rp_mutex' with 'rp_lock' Hannes Reinecke
@ 2016-04-25 20:26 ` Ewan D. Milne
  2016-05-09 12:29 ` Johannes Thumshirn
  2016-05-10 18:33 ` Bart Van Assche
  2 siblings, 0 replies; 10+ messages in thread
From: Ewan D. Milne @ 2016-04-25 20:26 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, Christoph Hellwig, James Bottomley, linux-scsi

On Mon, 2016-04-25 at 10:01 +0200, Hannes Reinecke wrote:
> We cannot use an embedded mutex in a structure with reference
> counting, as mutex unlock might be delayed, and the waiters
> might then access an already freed memory area.
> So convert it to a spinlock.
> 
> For details cf https://lkml.org/lkml/2015/2/11/245
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>

There are comments at the beginning of fc_rport.c in the section
RPORT LOCKING that still refer to the rport mutex.  (I assume this is
really the fc_rport_priv mutex since fc_rport does not have a mutex.)
These comments should be updated if the locking is changed.  Also
see the comment about the mutex being held near fc_rport_enter_delete().

We may have a reproducible test case for our FCoE issues, I will
see if we can tell if this fixes our problem.

-Ewan

> ---
>  drivers/scsi/libfc/fc_rport.c | 90 +++++++++++++++++++++----------------------
>  include/scsi/libfc.h          |  4 +-
>  2 files changed, 47 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
> index 589ff9a..4520b5a 100644
> --- a/drivers/scsi/libfc/fc_rport.c
> +++ b/drivers/scsi/libfc/fc_rport.c
> @@ -136,7 +136,7 @@ static struct fc_rport_priv *fc_rport_create(struct fc_lport *lport,
>  	rdata->ids.roles = FC_RPORT_ROLE_UNKNOWN;
>  
>  	kref_init(&rdata->kref);
> -	mutex_init(&rdata->rp_mutex);
> +	spin_lock_init(&rdata->rp_lock);
>  	rdata->local_port = lport;
>  	rdata->rp_state = RPORT_ST_INIT;
>  	rdata->event = RPORT_EV_NONE;
> @@ -251,7 +251,7 @@ static void fc_rport_work(struct work_struct *work)
>  	struct fc4_prov *prov;
>  	u8 type;
>  
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  	event = rdata->event;
>  	rport_ops = rdata->ops;
>  	rport = rdata->rport;
> @@ -264,7 +264,7 @@ static void fc_rport_work(struct work_struct *work)
>  		rdata->event = RPORT_EV_NONE;
>  		rdata->major_retries = 0;
>  		kref_get(&rdata->kref);
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  
>  		if (!rport)
>  			rport = fc_remote_port_add(lport->host, 0, &ids);
> @@ -274,7 +274,7 @@ static void fc_rport_work(struct work_struct *work)
>  			kref_put(&rdata->kref, lport->tt.rport_destroy);
>  			return;
>  		}
> -		mutex_lock(&rdata->rp_mutex);
> +		spin_lock(&rdata->rp_lock);
>  		if (rdata->rport)
>  			FC_RPORT_DBG(rdata, "rport already allocated\n");
>  		rdata->rport = rport;
> @@ -287,7 +287,7 @@ static void fc_rport_work(struct work_struct *work)
>  		rpriv->flags = rdata->flags;
>  		rpriv->e_d_tov = rdata->e_d_tov;
>  		rpriv->r_a_tov = rdata->r_a_tov;
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  
>  		if (rport_ops && rport_ops->event_callback) {
>  			FC_RPORT_DBG(rdata, "callback ev %d\n", event);
> @@ -313,7 +313,7 @@ static void fc_rport_work(struct work_struct *work)
>  			mutex_unlock(&fc_prov_mutex);
>  		}
>  		port_id = rdata->ids.port_id;
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  
>  		if (rport_ops && rport_ops->event_callback) {
>  			FC_RPORT_DBG(rdata, "callback ev %d\n", event);
> @@ -334,18 +334,18 @@ static void fc_rport_work(struct work_struct *work)
>  		if (rport) {
>  			rpriv = rport->dd_data;
>  			rpriv->rp_state = RPORT_ST_DELETE;
> -			mutex_lock(&rdata->rp_mutex);
> +			spin_lock(&rdata->rp_lock);
>  			rdata->rport = NULL;
> -			mutex_unlock(&rdata->rp_mutex);
> +			spin_unlock(&rdata->rp_lock);
>  			fc_remote_port_delete(rport);
>  		}
>  
>  		mutex_lock(&lport->disc.disc_mutex);
> -		mutex_lock(&rdata->rp_mutex);
> +		spin_lock(&rdata->rp_lock);
>  		if (rdata->rp_state == RPORT_ST_DELETE) {
>  			if (port_id == FC_FID_DIR_SERV) {
>  				rdata->event = RPORT_EV_NONE;
> -				mutex_unlock(&rdata->rp_mutex);
> +				spin_unlock(&rdata->rp_lock);
>  				kref_put(&rdata->kref, lport->tt.rport_destroy);
>  			} else if ((rdata->flags & FC_RP_STARTED) &&
>  				   rdata->major_retries <
> @@ -354,11 +354,11 @@ static void fc_rport_work(struct work_struct *work)
>  				rdata->event = RPORT_EV_NONE;
>  				FC_RPORT_DBG(rdata, "work restart\n");
>  				fc_rport_enter_flogi(rdata);
> -				mutex_unlock(&rdata->rp_mutex);
> +				spin_unlock(&rdata->rp_lock);
>  			} else {
>  				FC_RPORT_DBG(rdata, "work delete\n");
>  				list_del_rcu(&rdata->peers);
> -				mutex_unlock(&rdata->rp_mutex);
> +				spin_unlock(&rdata->rp_lock);
>  				kref_put(&rdata->kref, lport->tt.rport_destroy);
>  			}
>  		} else {
> @@ -368,13 +368,13 @@ static void fc_rport_work(struct work_struct *work)
>  			rdata->event = RPORT_EV_NONE;
>  			if (rdata->rp_state == RPORT_ST_READY)
>  				fc_rport_enter_ready(rdata);
> -			mutex_unlock(&rdata->rp_mutex);
> +			spin_unlock(&rdata->rp_lock);
>  		}
>  		mutex_unlock(&lport->disc.disc_mutex);
>  		break;
>  
>  	default:
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  		break;
>  	}
>  }
> @@ -393,7 +393,7 @@ static void fc_rport_work(struct work_struct *work)
>   */
>  static int fc_rport_login(struct fc_rport_priv *rdata)
>  {
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  
>  	rdata->flags |= FC_RP_STARTED;
>  	switch (rdata->rp_state) {
> @@ -409,7 +409,7 @@ static int fc_rport_login(struct fc_rport_priv *rdata)
>  		fc_rport_enter_flogi(rdata);
>  		break;
>  	}
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  
>  	return 0;
>  }
> @@ -453,7 +453,7 @@ static void fc_rport_enter_delete(struct fc_rport_priv *rdata,
>   */
>  static int fc_rport_logoff(struct fc_rport_priv *rdata)
>  {
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  
>  	FC_RPORT_DBG(rdata, "Remove port\n");
>  
> @@ -470,7 +470,7 @@ static int fc_rport_logoff(struct fc_rport_priv *rdata)
>  	 */
>  	fc_rport_enter_delete(rdata, RPORT_EV_STOP);
>  out:
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  	return 0;
>  }
>  
> @@ -505,7 +505,7 @@ static void fc_rport_timeout(struct work_struct *work)
>  	struct fc_rport_priv *rdata =
>  		container_of(work, struct fc_rport_priv, retry_work.work);
>  
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  
>  	switch (rdata->rp_state) {
>  	case RPORT_ST_FLOGI:
> @@ -530,7 +530,7 @@ static void fc_rport_timeout(struct work_struct *work)
>  		break;
>  	}
>  
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  }
>  
>  /**
> @@ -666,7 +666,7 @@ static void fc_rport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
>  	if (fp == ERR_PTR(-FC_EX_CLOSED))
>  		goto put;
>  
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  
>  	if (rdata->rp_state != RPORT_ST_FLOGI) {
>  		FC_RPORT_DBG(rdata, "Received a FLOGI response, but in state "
> @@ -700,7 +700,7 @@ static void fc_rport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
>  out:
>  	fc_frame_free(fp);
>  err:
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  put:
>  	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
>  	return;
> @@ -783,7 +783,7 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
>  		rjt_data.explan = ELS_EXPL_NOT_NEIGHBOR;
>  		goto reject;
>  	}
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  
>  	FC_RPORT_DBG(rdata, "Received FLOGI in %s state\n",
>  		     fc_rport_state(rdata));
> @@ -805,7 +805,7 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
>  		if (lport->point_to_multipoint)
>  			break;
>  	case RPORT_ST_DELETE:
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  		rjt_data.reason = ELS_RJT_FIP;
>  		rjt_data.explan = ELS_EXPL_NOT_NEIGHBOR;
>  		goto reject;
> @@ -822,13 +822,13 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
>  		 * This queues work to be sure exchanges are reset.
>  		 */
>  		fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  		rjt_data.reason = ELS_RJT_BUSY;
>  		rjt_data.explan = ELS_EXPL_NONE;
>  		goto reject;
>  	}
>  	if (fc_rport_login_complete(rdata, fp)) {
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  		rjt_data.reason = ELS_RJT_LOGIC;
>  		rjt_data.explan = ELS_EXPL_NONE;
>  		goto reject;
> @@ -850,7 +850,7 @@ static void fc_rport_recv_flogi_req(struct fc_lport *lport,
>  	else
>  		fc_rport_state_enter(rdata, RPORT_ST_PLOGI_WAIT);
>  out:
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  	mutex_unlock(&disc->disc_mutex);
>  	fc_frame_free(rx_fp);
>  	return;
> @@ -881,7 +881,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
>  	u16 cssp_seq;
>  	u8 op;
>  
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  
>  	FC_RPORT_DBG(rdata, "Received a PLOGI %s\n", fc_els_resp_type(fp));
>  
> @@ -922,7 +922,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
>  out:
>  	fc_frame_free(fp);
>  err:
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
>  }
>  
> @@ -1005,7 +1005,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
>  	u8 op;
>  	u8 resp_code = 0;
>  
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  
>  	FC_RPORT_DBG(rdata, "Received a PRLI %s\n", fc_els_resp_type(fp));
>  
> @@ -1075,7 +1075,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
>  out:
>  	fc_frame_free(fp);
>  err:
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
>  }
>  
> @@ -1153,7 +1153,7 @@ static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
>  	struct fc_rport_priv *rdata = rdata_arg;
>  	u8 op;
>  
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  
>  	FC_RPORT_DBG(rdata, "Received a RTV %s\n", fc_els_resp_type(fp));
>  
> @@ -1197,7 +1197,7 @@ static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
>  out:
>  	fc_frame_free(fp);
>  err:
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
>  }
>  
> @@ -1289,7 +1289,7 @@ static void fc_rport_adisc_resp(struct fc_seq *sp, struct fc_frame *fp,
>  	struct fc_els_adisc *adisc;
>  	u8 op;
>  
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  
>  	FC_RPORT_DBG(rdata, "Received a ADISC response\n");
>  
> @@ -1326,7 +1326,7 @@ static void fc_rport_adisc_resp(struct fc_seq *sp, struct fc_frame *fp,
>  out:
>  	fc_frame_free(fp);
>  err:
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
>  }
>  
> @@ -1483,7 +1483,7 @@ static void fc_rport_recv_els_req(struct fc_lport *lport, struct fc_frame *fp)
>  		mutex_unlock(&lport->disc.disc_mutex);
>  		goto reject;
>  	}
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  	mutex_unlock(&lport->disc.disc_mutex);
>  
>  	switch (rdata->rp_state) {
> @@ -1493,7 +1493,7 @@ static void fc_rport_recv_els_req(struct fc_lport *lport, struct fc_frame *fp)
>  	case RPORT_ST_ADISC:
>  		break;
>  	default:
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  		goto reject;
>  	}
>  
> @@ -1523,7 +1523,7 @@ static void fc_rport_recv_els_req(struct fc_lport *lport, struct fc_frame *fp)
>  		break;
>  	}
>  
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  	return;
>  
>  reject:
> @@ -1616,7 +1616,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
>  		goto reject;
>  	}
>  
> -	mutex_lock(&rdata->rp_mutex);
> +	spin_lock(&rdata->rp_lock);
>  	mutex_unlock(&disc->disc_mutex);
>  
>  	rdata->ids.port_name = get_unaligned_be64(&pl->fl_wwpn);
> @@ -1643,7 +1643,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
>  	case RPORT_ST_PLOGI:
>  		FC_RPORT_DBG(rdata, "Received PLOGI in PLOGI state\n");
>  		if (rdata->ids.port_name < lport->wwpn) {
> -			mutex_unlock(&rdata->rp_mutex);
> +			spin_unlock(&rdata->rp_lock);
>  			rjt_data.reason = ELS_RJT_INPROG;
>  			rjt_data.explan = ELS_EXPL_NONE;
>  			goto reject;
> @@ -1661,14 +1661,14 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
>  	case RPORT_ST_DELETE:
>  		FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n",
>  			     fc_rport_state(rdata));
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  		rjt_data.reason = ELS_RJT_BUSY;
>  		rjt_data.explan = ELS_EXPL_NONE;
>  		goto reject;
>  	}
>  	if (!fc_rport_compatible_roles(lport, rdata)) {
>  		FC_RPORT_DBG(rdata, "Received PLOGI for incompatible role\n");
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  		rjt_data.reason = ELS_RJT_LOGIC;
>  		rjt_data.explan = ELS_EXPL_NONE;
>  		goto reject;
> @@ -1691,7 +1691,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
>  	lport->tt.frame_send(lport, fp);
>  	fc_rport_enter_prli(rdata);
>  out:
> -	mutex_unlock(&rdata->rp_mutex);
> +	spin_unlock(&rdata->rp_lock);
>  	fc_frame_free(rx_fp);
>  	return;
>  
> @@ -1910,12 +1910,12 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp)
>  	mutex_lock(&lport->disc.disc_mutex);
>  	rdata = lport->tt.rport_lookup(lport, sid);
>  	if (rdata) {
> -		mutex_lock(&rdata->rp_mutex);
> +		spin_lock(&rdata->rp_lock);
>  		FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
>  			     fc_rport_state(rdata));
>  
>  		fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
> -		mutex_unlock(&rdata->rp_mutex);
> +		spin_unlock(&rdata->rp_lock);
>  	} else
>  		FC_RPORT_ID_DBG(lport, sid,
>  				"Received LOGO from non-logged-in port\n");
> diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
> index 93d14da..7d63d23 100644
> --- a/include/scsi/libfc.h
> +++ b/include/scsi/libfc.h
> @@ -187,7 +187,7 @@ struct fc_rport_libfc_priv {
>   * @major_retries:  The retry count for the entire PLOGI/PRLI state machine
>   * @e_d_tov:        Error detect timeout value (in msec)
>   * @r_a_tov:        Resource allocation timeout value (in msec)
> - * @rp_mutex:       The mutex that protects the remote port
> + * @rp_lock:        The lock that protects the remote port
>   * @retry_work:     Handle for retries
>   * @event_callback: Callback when READY, FAILED or LOGO states complete
>   * @prli_count:     Count of open PRLI sessions in providers
> @@ -207,7 +207,7 @@ struct fc_rport_priv {
>  	unsigned int	            major_retries;
>  	unsigned int	            e_d_tov;
>  	unsigned int	            r_a_tov;
> -	struct mutex                rp_mutex;
> +	spinlock_t		    rp_lock;
>  	struct delayed_work	    retry_work;
>  	enum fc_rport_event         event;
>  	struct fc_rport_operations  *ops;



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
  2016-04-25  8:01 [PATCH] libfc: replace 'rp_mutex' with 'rp_lock' Hannes Reinecke
  2016-04-25 20:26 ` Ewan D. Milne
@ 2016-05-09 12:29 ` Johannes Thumshirn
  2016-05-10 18:33 ` Bart Van Assche
  2 siblings, 0 replies; 10+ messages in thread
From: Johannes Thumshirn @ 2016-05-09 12:29 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K. Petersen, Christoph Hellwig, Ewan Milne,
	James Bottomley, linux-scsi

On Mon, Apr 25, 2016 at 10:01:33AM +0200, Hannes Reinecke wrote:
> We cannot use an embedded mutex in a structure with reference
> counting, as mutex unlock might be delayed, and the waiters
> might then access an already freed memory area.
> So convert it to a spinlock.
> 
> For details cf https://lkml.org/lkml/2015/2/11/245
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>

-- 
Johannes Thumshirn                                          Storage
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
  2016-04-25  8:01 [PATCH] libfc: replace 'rp_mutex' with 'rp_lock' Hannes Reinecke
  2016-04-25 20:26 ` Ewan D. Milne
  2016-05-09 12:29 ` Johannes Thumshirn
@ 2016-05-10 18:33 ` Bart Van Assche
  2016-05-11  1:48   ` Martin K. Petersen
  2016-05-11  5:49   ` Hannes Reinecke
  2 siblings, 2 replies; 10+ messages in thread
From: Bart Van Assche @ 2016-05-10 18:33 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, Ewan Milne, James Bottomley, linux-scsi

On 04/25/2016 01:01 AM, Hannes Reinecke wrote:
> We cannot use an embedded mutex in a structure with reference
> counting, as mutex unlock might be delayed, and the waiters
> might then access an already freed memory area.
> So convert it to a spinlock.
 >
 > For details cf https://lkml.org/lkml/2015/2/11/245

Hello Hannes,

Is what you describe a theoretical concern or have you observed any 
issues that could have been caused by the rport mutex? I'm asking this 
because my interpretation of the thread you refer to is different. My 
conclusion is that it is safe to embed a mutex in a structure that uses 
reference counting but that the mutex_unlock() call may trigger a 
spurious wakeup. I think that the conclusion of that thread was that 
glibc and kernel code should tolerate such spurious wakeups.

Bart.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
  2016-05-10 18:33 ` Bart Van Assche
@ 2016-05-11  1:48   ` Martin K. Petersen
  2016-05-11  5:49   ` Hannes Reinecke
  1 sibling, 0 replies; 10+ messages in thread
From: Martin K. Petersen @ 2016-05-11  1:48 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Hannes Reinecke, Martin K. Petersen, Christoph Hellwig,
	Ewan Milne, James Bottomley, linux-scsi

>>>>> "Bart" == Bart Van Assche <bart.vanassche@sandisk.com> writes:

Bart> Is what you describe a theoretical concern or have you observed
Bart> any issues that could have been caused by the rport mutex?

I believe Ewan had some data. Ewan?

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
  2016-05-10 18:33 ` Bart Van Assche
  2016-05-11  1:48   ` Martin K. Petersen
@ 2016-05-11  5:49   ` Hannes Reinecke
  2016-05-11  6:07     ` Hannes Reinecke
  1 sibling, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2016-05-11  5:49 UTC (permalink / raw)
  To: Bart Van Assche, Martin K. Petersen
  Cc: Christoph Hellwig, Ewan Milne, James Bottomley, linux-scsi

On 05/10/2016 08:33 PM, Bart Van Assche wrote:
> On 04/25/2016 01:01 AM, Hannes Reinecke wrote:
>> We cannot use an embedded mutex in a structure with reference
>> counting, as mutex unlock might be delayed, and the waiters
>> might then access an already freed memory area.
>> So convert it to a spinlock.
>>
>> For details cf https://lkml.org/lkml/2015/2/11/245
> 
> Hello Hannes,
> 
> Is what you describe a theoretical concern or have you observed any
> issues that could have been caused by the rport mutex? I'm asking
> this because my interpretation of the thread you refer to is
> different. My conclusion is that it is safe to embed a mutex in a
> structure that uses reference counting but that the mutex_unlock()
> call may trigger a spurious wakeup. I think that the conclusion of
> that thread was that glibc and kernel code should tolerate such
> spurious wakeups.
> 
We have several bugzillas referring to that specific code.
Most notably triggered when removing target ports with and open-fcoe
HBA.

And this patch seems to resolve it.
Read: with this patch the issue doesn't occur anymore.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
  2016-05-11  5:49   ` Hannes Reinecke
@ 2016-05-11  6:07     ` Hannes Reinecke
  2016-05-11 14:44       ` Bart Van Assche
  0 siblings, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2016-05-11  6:07 UTC (permalink / raw)
  To: Bart Van Assche, Martin K. Petersen
  Cc: Christoph Hellwig, Ewan Milne, James Bottomley, linux-scsi

On 05/11/2016 07:49 AM, Hannes Reinecke wrote:
> On 05/10/2016 08:33 PM, Bart Van Assche wrote:
>> On 04/25/2016 01:01 AM, Hannes Reinecke wrote:
>>> We cannot use an embedded mutex in a structure with reference
>>> counting, as mutex unlock might be delayed, and the waiters
>>> might then access an already freed memory area.
>>> So convert it to a spinlock.
>>>
>>> For details cf https://lkml.org/lkml/2015/2/11/245
>>
>> Hello Hannes,
>>
>> Is what you describe a theoretical concern or have you observed any
>> issues that could have been caused by the rport mutex? I'm asking
>> this because my interpretation of the thread you refer to is
>> different. My conclusion is that it is safe to embed a mutex in a
>> structure that uses reference counting but that the mutex_unlock()
>> call may trigger a spurious wakeup. I think that the conclusion of
>> that thread was that glibc and kernel code should tolerate such
>> spurious wakeups.
>>
> We have several bugzillas referring to that specific code.
> Most notably triggered when removing target ports with and open-fcoe
> HBA.
> 
> And this patch seems to resolve it.
> Read: with this patch the issue doesn't occur anymore.
> 
And for the unbelievers here's the crash:

general protection fault: 0000 [#1] SMP
Modules linked in: raw rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_reso
lver nfs lockd sunrpc fscache iscsi_ibft iscsi_boot_sysfs af_packet
xfs ext4 libcrc32c crc16 mbcache jbd2 joydev coretemp kvm_intel kvm
crct10dif_pclmul crc32_pclmul iTCO_wdt iTCO_vendor_support
aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd
pcspkr lpc_ich mfd_core enic ipmi_si ipmi_msghandler wmi shpchp
acpi_power_meter processor button ac hid_generic usbhid btrfs xor
raid6_pq mgag200 syscopyarea ehci_pci sysfillrect sysimgblt
i2c_algo_bit ehci_hcd drm_kms_helper ttm usbcore drm crc32c_intel
usb_common megaraid_sas dm_service_time sd_mod scsi_dh_rdac
scsi_dh_emc scsi_dh_alua fnic(OEX) libfcoe libfc scsi_transpor
t_fc scsi_tgt dm_multipath scsi_dh sg dm_mod scsi_mod autofs4
Supported: Yes, External
CPU: 0 PID: 16868 Comm: kworker/u25:2 Tainted: G        W  OE X 3
.12.55-52.42.1.10435.0.PTF.962846-default #1
Workqueue: fnic_event_wq fnic_handle_frame [fnic]
task: ffff88080021e140 ti: ffff8807cf076000 task.ti: ffff8807cf076000
RIP: 0010:[<ffffffffa00b2adb>]  [<ffffffffa00b2adb>]
fc_rport_lookup+0x4b/0x70 [libfc]
RSP: 0018:ffff8807cf077d20  EFLAGS: 00010202
RAX: 8500a090df03ff00 RBX: ffff8808560386f0 RCX: ffff880846351400
RDX: 8500a090df040000 RSI: 0000000000610c00 RDI: ffff880856038738
RBP: ffff8808560386f0 R08: 0000000000000001 R09: 0000000000000000
R10: ffff8808402f8e40 R11: ffff880856127600 R12: ffff8807cf077d78
R13: 0000000000610c00 R14: ffff880846351400 R15: 0000000000000000
FS:  0000000000000000(0000) GS:ffff88087fc00000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ff6a540f000 CR3: 0000000846831000 CR4: 00000000001407f0
Stack:
8500a090df040000 ffffffffa00b2e17 ffff8808042fe0c0 ffff8808560386f0
ffff8807cf077d78 ffff880856038750 ffffffffa00a9f81 ffff880800007530
ffff8808402f8e40 ffff880856127600 0000000000000001 ffff880846351408
Call Trace:
  [<ffffffffa00b2e17>] fc_rport_create+0x17/0x1b0 [libfc]
  [<ffffffffa00a9f81>] fc_disc_recv_req+0x261/0x480 [libfc]
  [<ffffffffa00b1008>] fc_lport_recv_els_req+0x68/0x130 [libfc]
  [<ffffffffa00afd5a>] fc_lport_recv_req+0x9a/0xf0 [libfc]
  [<ffffffffa00e8333>] fnic_handle_frame+0x63/0xd0 [fnic]
  [<ffffffff8106fd52>] process_one_work+0x172/0x420
  [<ffffffff810709ca>] worker_thread+0x11a/0x3c0
  [<ffffffff81077344>] kthread+0xb4/0xc0
  [<ffffffff81521318>] ret_from_fork+0x58/0x90
Code: ff ff ff 75 26 eb 39 66 0f 1f 84 00 00 00 00 00 48 8b 80 00
 01 00 00 48 89 04 24 48 8b 14 24 48 39 d7 48 8d 82 00 ff ff ff 74
15 <39> b2 28 ff ff ff 75 dd 48 83 c4 08 c3 0f 1f 84 00 00 00 00 00
RIP  [<ffffffffa00b2adb>] fc_rport_lookup+0x4b/0x70 [libfc]

So no, it's not theoretical.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
  2016-05-11  6:07     ` Hannes Reinecke
@ 2016-05-11 14:44       ` Bart Van Assche
  2016-05-17  6:46         ` Hannes Reinecke
  0 siblings, 1 reply; 10+ messages in thread
From: Bart Van Assche @ 2016-05-11 14:44 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, Ewan Milne, James Bottomley, linux-scsi

On 05/10/16 23:07, Hannes Reinecke wrote:
> On 05/11/2016 07:49 AM, Hannes Reinecke wrote:
> RIP: 0010:[<ffffffffa00b2adb>]  [<ffffffffa00b2adb>]
> fc_rport_lookup+0x4b/0x70 [libfc]
> Call Trace:
>    [<ffffffffa00b2e17>] fc_rport_create+0x17/0x1b0 [libfc]
>    [<ffffffffa00a9f81>] fc_disc_recv_req+0x261/0x480 [libfc]
>    [<ffffffffa00b1008>] fc_lport_recv_els_req+0x68/0x130 [libfc]
>    [<ffffffffa00afd5a>] fc_lport_recv_req+0x9a/0xf0 [libfc]
>    [<ffffffffa00e8333>] fnic_handle_frame+0x63/0xd0 [fnic]
>    [<ffffffff8106fd52>] process_one_work+0x172/0x420
>    [<ffffffff810709ca>] worker_thread+0x11a/0x3c0
>    [<ffffffff81077344>] kthread+0xb4/0xc0
>    [<ffffffff81521318>] ret_from_fork+0x58/0x90

Hello Hannes,

Thanks for sharing this information. fc_disc_recv_req() protects the 
fc_rport_create() call via a mutex (disc_mutex). Since a mutex_lock() 
call may sleep it can trigger the start of an RCU grace period. I think 
this may result in freeing of an rport while fc_rport_lookup() is 
examining it. Have you already considered to add a 
rcu_read_lock()/rcu_read_unlock() pair in fc_rport_lookup()?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
  2016-05-11 14:44       ` Bart Van Assche
@ 2016-05-17  6:46         ` Hannes Reinecke
  2016-05-17 18:04           ` Bart Van Assche
  0 siblings, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2016-05-17  6:46 UTC (permalink / raw)
  To: Bart Van Assche, Martin K. Petersen
  Cc: Christoph Hellwig, Ewan Milne, James Bottomley, linux-scsi

On 05/11/2016 04:44 PM, Bart Van Assche wrote:
> On 05/10/16 23:07, Hannes Reinecke wrote:
>> On 05/11/2016 07:49 AM, Hannes Reinecke wrote:
>> RIP: 0010:[<ffffffffa00b2adb>]  [<ffffffffa00b2adb>]
>> fc_rport_lookup+0x4b/0x70 [libfc]
>> Call Trace:
>>    [<ffffffffa00b2e17>] fc_rport_create+0x17/0x1b0 [libfc]
>>    [<ffffffffa00a9f81>] fc_disc_recv_req+0x261/0x480 [libfc]
>>    [<ffffffffa00b1008>] fc_lport_recv_els_req+0x68/0x130 [libfc]
>>    [<ffffffffa00afd5a>] fc_lport_recv_req+0x9a/0xf0 [libfc]
>>    [<ffffffffa00e8333>] fnic_handle_frame+0x63/0xd0 [fnic]
>>    [<ffffffff8106fd52>] process_one_work+0x172/0x420
>>    [<ffffffff810709ca>] worker_thread+0x11a/0x3c0
>>    [<ffffffff81077344>] kthread+0xb4/0xc0
>>    [<ffffffff81521318>] ret_from_fork+0x58/0x90
>
> Hello Hannes,
>
> Thanks for sharing this information. fc_disc_recv_req() protects the
> fc_rport_create() call via a mutex (disc_mutex). Since a mutex_lock()
> call may sleep it can trigger the start of an RCU grace period. I think
> this may result in freeing of an rport while fc_rport_lookup() is
> examining it. Have you already considered to add a
> rcu_read_lock()/rcu_read_unlock() pair in fc_rport_lookup()?
>
No, I haven't so far.
This issue is hard to trigger, and I've only got the customer report to 
go by.

Also, when using an rcu_read_lock() here one probably needs to revisit 
the entire locking structure in libfc:
rport list is an RCU-proctected list, so in principle one only needs to 
hold the rport mutex when _assigning_ new rports, and _could_ drop the 
mutex usage for a simple lookup.
But this really needs some more thought and testing, so I haven't 
attempted it.

Also, with your suggestion I would need to take the mutex_lock _and_ 
call rcu_read_lock(), which just looks wrong.
Hence I prefer to use the spin_lock method (which, incidentally, is also 
suggested in the RCU documentation).

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] libfc: replace 'rp_mutex' with 'rp_lock'
  2016-05-17  6:46         ` Hannes Reinecke
@ 2016-05-17 18:04           ` Bart Van Assche
  0 siblings, 0 replies; 10+ messages in thread
From: Bart Van Assche @ 2016-05-17 18:04 UTC (permalink / raw)
  To: Hannes Reinecke, Martin K. Petersen
  Cc: Christoph Hellwig, Ewan Milne, James Bottomley, linux-scsi

On 05/16/2016 11:46 PM, Hannes Reinecke wrote:
> On 05/11/2016 04:44 PM, Bart Van Assche wrote:
>> On 05/10/16 23:07, Hannes Reinecke wrote:
>>> On 05/11/2016 07:49 AM, Hannes Reinecke wrote:
>>> RIP: 0010:[<ffffffffa00b2adb>]  [<ffffffffa00b2adb>]
>>> fc_rport_lookup+0x4b/0x70 [libfc]
>>> Call Trace:
>>>    [<ffffffffa00b2e17>] fc_rport_create+0x17/0x1b0 [libfc]
>>>    [<ffffffffa00a9f81>] fc_disc_recv_req+0x261/0x480 [libfc]
>>>    [<ffffffffa00b1008>] fc_lport_recv_els_req+0x68/0x130 [libfc]
>>>    [<ffffffffa00afd5a>] fc_lport_recv_req+0x9a/0xf0 [libfc]
>>>    [<ffffffffa00e8333>] fnic_handle_frame+0x63/0xd0 [fnic]
>>>    [<ffffffff8106fd52>] process_one_work+0x172/0x420
>>>    [<ffffffff810709ca>] worker_thread+0x11a/0x3c0
>>>    [<ffffffff81077344>] kthread+0xb4/0xc0
>>>    [<ffffffff81521318>] ret_from_fork+0x58/0x90
>>
>> Hello Hannes,
>>
>> Thanks for sharing this information. fc_disc_recv_req() protects the
>> fc_rport_create() call via a mutex (disc_mutex). Since a mutex_lock()
>> call may sleep it can trigger the start of an RCU grace period. I think
>> this may result in freeing of an rport while fc_rport_lookup() is
>> examining it. Have you already considered to add a
>> rcu_read_lock()/rcu_read_unlock() pair in fc_rport_lookup()?
>>
> No, I haven't so far.
> This issue is hard to trigger, and I've only got the customer report to
> go by.
>
> Also, when using an rcu_read_lock() here one probably needs to revisit
> the entire locking structure in libfc:
> rport list is an RCU-proctected list, so in principle one only needs to
> hold the rport mutex when _assigning_ new rports, and _could_ drop the
> mutex usage for a simple lookup.
> But this really needs some more thought and testing, so I haven't
> attempted it.
>
> Also, with your suggestion I would need to take the mutex_lock _and_
> call rcu_read_lock(), which just looks wrong.
> Hence I prefer to use the spin_lock method (which, incidentally, is also
> suggested in the RCU documentation).

Hello Hannes,

I think the following section from Documentation/RCU/checklist.txt 
applies to the code in fc_rport_lookup():

        "Do the RCU read-side critical sections make proper use of
	rcu_read_lock() and friends?  These primitives are needed
	to prevent grace periods from ending prematurely, which
	could result in data being unceremoniously freed out from
	under your read-side code, which can greatly increase the
	actuarial risk of your kernel."

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-05-17 18:04 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-25  8:01 [PATCH] libfc: replace 'rp_mutex' with 'rp_lock' Hannes Reinecke
2016-04-25 20:26 ` Ewan D. Milne
2016-05-09 12:29 ` Johannes Thumshirn
2016-05-10 18:33 ` Bart Van Assche
2016-05-11  1:48   ` Martin K. Petersen
2016-05-11  5:49   ` Hannes Reinecke
2016-05-11  6:07     ` Hannes Reinecke
2016-05-11 14:44       ` Bart Van Assche
2016-05-17  6:46         ` Hannes Reinecke
2016-05-17 18:04           ` Bart Van Assche

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.