linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
@ 2015-08-21 23:34 Jason Gunthorpe
       [not found] ` <1440200053-18890-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Jason Gunthorpe @ 2015-08-21 23:34 UTC (permalink / raw)
  To: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

Even though we don't expect the group to be created by the SM we
sill need to provide all the parameters to force the SM to validate
they are correct.

Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 47 +++++++++++++++++---------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 0d23e0568deb..c0e702c577d5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -448,8 +448,8 @@ out_locked:
 	return status;
 }
 
-static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
-			     int create)
+/* priv->lock must be held when calling */
+static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_sa_multicast *multicast;
@@ -471,7 +471,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 		IB_SA_MCMEMBER_REC_PKEY		|
 		IB_SA_MCMEMBER_REC_JOIN_STATE;
 
-	if (create) {
+	if (mcast != priv->broadcast) {
+		/*
+		 * RFC 4391:
+		 *  The MGID MUST use the same P_Key, Q_Key, SL, MTU,
+		 *  and HopLimit as those used in the broadcast-GID.  The rest
+		 *  of attributes SHOULD follow the values used in the
+		 *  broadcast-GID as well.
+		 */
 		comp_mask |=
 			IB_SA_MCMEMBER_REC_QKEY			|
 			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
@@ -492,19 +499,35 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 		rec.sl		  = priv->broadcast->mcmember.sl;
 		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
 		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
+
+		/*
+		 * Historically Linux IPoIB has never properly supported SEND
+		 * ONLY join. It emulated it by not providing all the required
+		 * attributes, which is enough to prevent group creation and
+		 * detect if there are full members or not. A major problem
+		 * with supporting SEND ONLY is detecting when the group is
+		 * auto-destroyed as IPoIB will cache the MLID..
+		 */
+#if 1
+		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+			comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+#else
+		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+			rec.join_state = 4;
+#endif
 	}
 
+	spin_unlock_irq(&priv->lock);
 	multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
 					 &rec, comp_mask, GFP_KERNEL,
 					 ipoib_mcast_join_complete, mcast);
+	spin_lock_irq(&priv->lock);
 	if (IS_ERR(multicast)) {
 		ret = PTR_ERR(multicast);
 		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
-		spin_lock_irq(&priv->lock);
 		/* Requeue this join task with a backoff delay */
 		__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
 		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-		spin_unlock_irq(&priv->lock);
 		complete(&mcast->done);
 	}
 }
@@ -517,7 +540,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
 	struct ib_port_attr port_attr;
 	unsigned long delay_until = 0;
 	struct ipoib_mcast *mcast = NULL;
-	int create = 1;
 
 	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
 		return;
@@ -566,7 +588,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
 		if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
 		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
 			mcast = priv->broadcast;
-			create = 0;
 			if (mcast->backoff > 1 &&
 			    time_before(jiffies, mcast->delay_until)) {
 				delay_until = mcast->delay_until;
@@ -590,13 +611,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
 				/* Found the next unjoined group */
 				init_completion(&mcast->done);
 				set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-				if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-					create = 0;
-				else
-					create = 1;
-				spin_unlock_irq(&priv->lock);
-				ipoib_mcast_join(dev, mcast, create);
-				spin_lock_irq(&priv->lock);
+				ipoib_mcast_join(dev, mcast);
 			} else if (!delay_until ||
 				 time_before(mcast->delay_until, delay_until))
 				delay_until = mcast->delay_until;
@@ -616,9 +631,9 @@ out:
 		init_completion(&mcast->done);
 		set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 	}
-	spin_unlock_irq(&priv->lock);
 	if (mcast)
-		ipoib_mcast_join(dev, mcast, create);
+		ipoib_mcast_join(dev, mcast);
+	spin_unlock_irq(&priv->lock);
 }
 
 int ipoib_mcast_start_thread(struct net_device *dev)
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures
       [not found] ` <1440200053-18890-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2015-08-21 23:34   ` Jason Gunthorpe
       [not found]     ` <1440200053-18890-2-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2015-08-25 12:58   ` [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins Hal Rosenstock
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 22+ messages in thread
From: Jason Gunthorpe @ 2015-08-21 23:34 UTC (permalink / raw)
  To: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

We expect send only joins to fail, it just means there are no listeners
for the group. The correct thing to do is silently drop the packet
at source.

Eg avahi will full join 224.0.0.251 which causes a send only IGMP packet
to 224.0.0.22, and then a warning level kmessage like this:

 ib0: sendonly multicast join failed for ff12:401b:ffff:0000:0000:0000:0000:0016, status -22

If there is no IP router listening to IGMP.

Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index c0e702c577d5..2d43ec542b63 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -393,8 +393,13 @@ static int ipoib_mcast_join_complete(int status,
 			goto out_locked;
 		}
 	} else {
-		if (mcast->logcount++ < 20) {
-			if (status == -ETIMEDOUT || status == -EAGAIN) {
+		bool silent_fail =
+		    test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
+		    status == -EINVAL;
+
+		if (mcast->logcount < 20) {
+			if (status == -ETIMEDOUT || status == -EAGAIN ||
+			    silent_fail) {
 				ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
 						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
 						mcast->mcmember.mgid.raw, status);
@@ -403,6 +408,9 @@ static int ipoib_mcast_join_complete(int status,
 						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
 					   mcast->mcmember.mgid.raw, status);
 			}
+
+			if (!silent_fail)
+				mcast->logcount++;
 		}
 
 		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found] ` <1440200053-18890-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2015-08-21 23:34   ` [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures Jason Gunthorpe
@ 2015-08-25 12:58   ` Hal Rosenstock
       [not found]     ` <55DC667A.30208-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  2015-08-25 17:50   ` Doug Ledford
  2015-09-03 16:50   ` Christoph Lameter
  3 siblings, 1 reply; 22+ messages in thread
From: Hal Rosenstock @ 2015-08-25 12:58 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 8/21/2015 7:34 PM, Jason Gunthorpe wrote:
> Even though we don't expect the group to be created by the SM we
> sill need to provide all the parameters to force the SM to validate
> they are correct.

Out of curiosity, has it been observed that there was inconsistency in
these additional IPoIB parameters between broadcast and non broadcast
groups on client side or is this just to be defensive to make sure this
does not occur ?

-- Hal

> 
> Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
> ---
>  drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 47 +++++++++++++++++---------
>  1 file changed, 31 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> index 0d23e0568deb..c0e702c577d5 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> @@ -448,8 +448,8 @@ out_locked:
>  	return status;
>  }
>  
> -static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
> -			     int create)
> +/* priv->lock must be held when calling */
> +static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	struct ib_sa_multicast *multicast;
> @@ -471,7 +471,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
>  		IB_SA_MCMEMBER_REC_PKEY		|
>  		IB_SA_MCMEMBER_REC_JOIN_STATE;
>  
> -	if (create) {
> +	if (mcast != priv->broadcast) {
> +		/*
> +		 * RFC 4391:
> +		 *  The MGID MUST use the same P_Key, Q_Key, SL, MTU,
> +		 *  and HopLimit as those used in the broadcast-GID.  The rest
> +		 *  of attributes SHOULD follow the values used in the
> +		 *  broadcast-GID as well.
> +		 */
>  		comp_mask |=
>  			IB_SA_MCMEMBER_REC_QKEY			|
>  			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
> @@ -492,19 +499,35 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
>  		rec.sl		  = priv->broadcast->mcmember.sl;
>  		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
>  		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
> +
> +		/*
> +		 * Historically Linux IPoIB has never properly supported SEND
> +		 * ONLY join. It emulated it by not providing all the required
> +		 * attributes, which is enough to prevent group creation and
> +		 * detect if there are full members or not. A major problem
> +		 * with supporting SEND ONLY is detecting when the group is
> +		 * auto-destroyed as IPoIB will cache the MLID..
> +		 */
> +#if 1
> +		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
> +			comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
> +#else
> +		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
> +			rec.join_state = 4;
> +#endif
>  	}
>  
> +	spin_unlock_irq(&priv->lock);
>  	multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
>  					 &rec, comp_mask, GFP_KERNEL,
>  					 ipoib_mcast_join_complete, mcast);
> +	spin_lock_irq(&priv->lock);
>  	if (IS_ERR(multicast)) {
>  		ret = PTR_ERR(multicast);
>  		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
> -		spin_lock_irq(&priv->lock);
>  		/* Requeue this join task with a backoff delay */
>  		__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
>  		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
> -		spin_unlock_irq(&priv->lock);
>  		complete(&mcast->done);
>  	}
>  }
> @@ -517,7 +540,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
>  	struct ib_port_attr port_attr;
>  	unsigned long delay_until = 0;
>  	struct ipoib_mcast *mcast = NULL;
> -	int create = 1;
>  
>  	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
>  		return;
> @@ -566,7 +588,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
>  		if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
>  		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
>  			mcast = priv->broadcast;
> -			create = 0;
>  			if (mcast->backoff > 1 &&
>  			    time_before(jiffies, mcast->delay_until)) {
>  				delay_until = mcast->delay_until;
> @@ -590,13 +611,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
>  				/* Found the next unjoined group */
>  				init_completion(&mcast->done);
>  				set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
> -				if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
> -					create = 0;
> -				else
> -					create = 1;
> -				spin_unlock_irq(&priv->lock);
> -				ipoib_mcast_join(dev, mcast, create);
> -				spin_lock_irq(&priv->lock);
> +				ipoib_mcast_join(dev, mcast);
>  			} else if (!delay_until ||
>  				 time_before(mcast->delay_until, delay_until))
>  				delay_until = mcast->delay_until;
> @@ -616,9 +631,9 @@ out:
>  		init_completion(&mcast->done);
>  		set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>  	}
> -	spin_unlock_irq(&priv->lock);
>  	if (mcast)
> -		ipoib_mcast_join(dev, mcast, create);
> +		ipoib_mcast_join(dev, mcast);
> +	spin_unlock_irq(&priv->lock);
>  }
>  
>  int ipoib_mcast_start_thread(struct net_device *dev)

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures
       [not found]     ` <1440200053-18890-2-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2015-08-25 12:59       ` Hal Rosenstock
       [not found]         ` <55DC66A1.9040207-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  2015-09-03 21:20       ` Doug Ledford
  1 sibling, 1 reply; 22+ messages in thread
From: Hal Rosenstock @ 2015-08-25 12:59 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 8/21/2015 7:34 PM, Jason Gunthorpe wrote:
> We expect send only joins to fail, it just means there are no listeners
> for the group. The correct thing to do is silently drop the packet
> at source.
> 
> Eg avahi will full join 224.0.0.251 which causes a send only IGMP packet
> to 224.0.0.22, and then a warning level kmessage like this:
> 
>  ib0: sendonly multicast join failed for ff12:401b:ffff:0000:0000:0000:0000:0016, status -22
> 
> If there is no IP router listening to IGMP.
> 
> Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
> ---
>  drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 12 ++++++++++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> index c0e702c577d5..2d43ec542b63 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> @@ -393,8 +393,13 @@ static int ipoib_mcast_join_complete(int status,
>  			goto out_locked;
>  		}
>  	} else {
> -		if (mcast->logcount++ < 20) {
> -			if (status == -ETIMEDOUT || status == -EAGAIN) {
> +		bool silent_fail =
> +		    test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
> +		    status == -EINVAL;

Aren't there other reasons that send only join might have EINVAL
indicated ? Maybe it's better to be overly silent rather than overly
verbose as to not spam the log but it seems like it would make debug of
such cases harder.

> +
> +		if (mcast->logcount < 20) {
> +			if (status == -ETIMEDOUT || status == -EAGAIN ||
> +			    silent_fail) {
>  				ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
>  						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
>  						mcast->mcmember.mgid.raw, status);

ipoib_dbg_mcast logging is conditionalized on CONFIG_INFINIBAND_IPOIB_DEBUG

> @@ -403,6 +408,9 @@ static int ipoib_mcast_join_complete(int status,
>  						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
>  					   mcast->mcmember.mgid.raw, status);
>  			}
> +
> +			if (!silent_fail)
> +				mcast->logcount++;
>  		}
>  
>  		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]     ` <55DC667A.30208-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-08-25 16:24       ` Jason Gunthorpe
  0 siblings, 0 replies; 22+ messages in thread
From: Jason Gunthorpe @ 2015-08-25 16:24 UTC (permalink / raw)
  To: Hal Rosenstock; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Tue, Aug 25, 2015 at 08:58:34AM -0400, Hal Rosenstock wrote:
> On 8/21/2015 7:34 PM, Jason Gunthorpe wrote:
> > Even though we don't expect the group to be created by the SM we
> > sill need to provide all the parameters to force the SM to validate
> > they are correct.
> 
> Out of curiosity, has it been observed that there was inconsistency in
> these additional IPoIB parameters between broadcast and non broadcast
> groups on client side or is this just to be defensive to make sure this
> does not occur ?

Not that I've seen. This is one half a code clean up, one half
better following the spec..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures
       [not found]         ` <55DC66A1.9040207-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-08-25 16:28           ` Jason Gunthorpe
       [not found]             ` <20150825162856.GC4425-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Jason Gunthorpe @ 2015-08-25 16:28 UTC (permalink / raw)
  To: Hal Rosenstock; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Tue, Aug 25, 2015 at 08:59:13AM -0400, Hal Rosenstock wrote:
> > -		if (mcast->logcount++ < 20) {
> > -			if (status == -ETIMEDOUT || status == -EAGAIN) {
> > +		bool silent_fail =
> > +		    test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
> > +		    status == -EINVAL;
> 
> Aren't there other reasons that send only join might have EINVAL
> indicated ?

Not sure, the layers below all eat the detailed error code. Hopefully
EINVAL isn't re-used.

> Maybe it's better to be overly silent rather than overly
> verbose as to not spam the log but it seems like it would make debug of
> such cases harder.

It makes debugging harder to have worthless messages because they
obscure what is going on. The first time I saw this I assumed there
was an issue, but it turns out to be an expected failure.

The other issue is the way the rate limiting works:

> > +		if (mcast->logcount < 20) {
> > +			if (status == -ETIMEDOUT || status == -EAGAIN ||
> > +			    silent_fail) {
> >  				ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
> >  						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
> >  						mcast->mcmember.mgid.raw, status);

So wasting logcount with expected failures just results in eating
unexpected failures...

> ipoib_dbg_mcast logging is conditionalized on CONFIG_INFINIBAND_IPOIB_DEBUG

Most distros turn this off so the change only impacts people trying to
debug this stuff.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found] ` <1440200053-18890-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2015-08-21 23:34   ` [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures Jason Gunthorpe
  2015-08-25 12:58   ` [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins Hal Rosenstock
@ 2015-08-25 17:50   ` Doug Ledford
       [not found]     ` <55DCAACD.3000307-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  2015-09-03 16:50   ` Christoph Lameter
  3 siblings, 1 reply; 22+ messages in thread
From: Doug Ledford @ 2015-08-25 17:50 UTC (permalink / raw)
  To: Jason Gunthorpe, linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 5299 bytes --]

On 08/21/2015 07:34 PM, Jason Gunthorpe wrote:
> Even though we don't expect the group to be created by the SM we
> sill need to provide all the parameters to force the SM to validate
> they are correct.

Why does this patch embed locking changes that, as far I can tell, are
not needed by the rest of the patch?  If the locking changes are needed
for some reason, then they likely need to be their own patch with their
own changelog.

> Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
> ---
>  drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 47 +++++++++++++++++---------
>  1 file changed, 31 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> index 0d23e0568deb..c0e702c577d5 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> @@ -448,8 +448,8 @@ out_locked:
>  	return status;
>  }
>  
> -static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
> -			     int create)
> +/* priv->lock must be held when calling */
> +static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
>  	struct ib_sa_multicast *multicast;
> @@ -471,7 +471,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
>  		IB_SA_MCMEMBER_REC_PKEY		|
>  		IB_SA_MCMEMBER_REC_JOIN_STATE;
>  
> -	if (create) {
> +	if (mcast != priv->broadcast) {
> +		/*
> +		 * RFC 4391:
> +		 *  The MGID MUST use the same P_Key, Q_Key, SL, MTU,
> +		 *  and HopLimit as those used in the broadcast-GID.  The rest
> +		 *  of attributes SHOULD follow the values used in the
> +		 *  broadcast-GID as well.
> +		 */
>  		comp_mask |=
>  			IB_SA_MCMEMBER_REC_QKEY			|
>  			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
> @@ -492,19 +499,35 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
>  		rec.sl		  = priv->broadcast->mcmember.sl;
>  		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
>  		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
> +
> +		/*
> +		 * Historically Linux IPoIB has never properly supported SEND
> +		 * ONLY join. It emulated it by not providing all the required
> +		 * attributes, which is enough to prevent group creation and
> +		 * detect if there are full members or not. A major problem
> +		 * with supporting SEND ONLY is detecting when the group is
> +		 * auto-destroyed as IPoIB will cache the MLID..
> +		 */
> +#if 1
> +		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
> +			comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
> +#else
> +		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
> +			rec.join_state = 4;
> +#endif
>  	}
>  
> +	spin_unlock_irq(&priv->lock);
>  	multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
>  					 &rec, comp_mask, GFP_KERNEL,
>  					 ipoib_mcast_join_complete, mcast);
> +	spin_lock_irq(&priv->lock);
>  	if (IS_ERR(multicast)) {
>  		ret = PTR_ERR(multicast);
>  		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
> -		spin_lock_irq(&priv->lock);
>  		/* Requeue this join task with a backoff delay */
>  		__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
>  		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
> -		spin_unlock_irq(&priv->lock);
>  		complete(&mcast->done);
>  	}
>  }
> @@ -517,7 +540,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
>  	struct ib_port_attr port_attr;
>  	unsigned long delay_until = 0;
>  	struct ipoib_mcast *mcast = NULL;
> -	int create = 1;
>  
>  	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
>  		return;
> @@ -566,7 +588,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
>  		if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
>  		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
>  			mcast = priv->broadcast;
> -			create = 0;
>  			if (mcast->backoff > 1 &&
>  			    time_before(jiffies, mcast->delay_until)) {
>  				delay_until = mcast->delay_until;
> @@ -590,13 +611,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
>  				/* Found the next unjoined group */
>  				init_completion(&mcast->done);
>  				set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
> -				if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
> -					create = 0;
> -				else
> -					create = 1;
> -				spin_unlock_irq(&priv->lock);
> -				ipoib_mcast_join(dev, mcast, create);
> -				spin_lock_irq(&priv->lock);
> +				ipoib_mcast_join(dev, mcast);
>  			} else if (!delay_until ||
>  				 time_before(mcast->delay_until, delay_until))
>  				delay_until = mcast->delay_until;
> @@ -616,9 +631,9 @@ out:
>  		init_completion(&mcast->done);
>  		set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
>  	}
> -	spin_unlock_irq(&priv->lock);
>  	if (mcast)
> -		ipoib_mcast_join(dev, mcast, create);
> +		ipoib_mcast_join(dev, mcast);
> +	spin_unlock_irq(&priv->lock);
>  }
>  
>  int ipoib_mcast_start_thread(struct net_device *dev)
> 


-- 
Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
              GPG KeyID: 0E572FDD



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 884 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]     ` <55DCAACD.3000307-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2015-08-25 18:22       ` Jason Gunthorpe
       [not found]         ` <20150825182233.GA20744-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Jason Gunthorpe @ 2015-08-25 18:22 UTC (permalink / raw)
  To: Doug Ledford; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Tue, Aug 25, 2015 at 01:50:05PM -0400, Doug Ledford wrote:
> On 08/21/2015 07:34 PM, Jason Gunthorpe wrote:
> > Even though we don't expect the group to be created by the SM we
> > sill need to provide all the parameters to force the SM to validate
> > they are correct.
> 
> Why does this patch embed locking changes that, as far I can tell, are
> not needed by the rest of the patch?

test_bit was lowered into ipoib_mcast_join, which requires pushing the
lock unlock down as well. The set_bit side holds that lock.

> If the locking changes are needed for some reason, then they likely
> need to be their own patch with their own changelog.

It doesn't make sense to move the locking without the code motion that
motivates it, IMHO.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]         ` <20150825182233.GA20744-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2015-08-25 18:35           ` Doug Ledford
       [not found]             ` <55DCB56F.5000001-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Doug Ledford @ 2015-08-25 18:35 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 1365 bytes --]

On 08/25/2015 02:22 PM, Jason Gunthorpe wrote:
> On Tue, Aug 25, 2015 at 01:50:05PM -0400, Doug Ledford wrote:
>> On 08/21/2015 07:34 PM, Jason Gunthorpe wrote:
>>> Even though we don't expect the group to be created by the SM we
>>> sill need to provide all the parameters to force the SM to validate
>>> they are correct.
>>
>> Why does this patch embed locking changes that, as far I can tell, are
>> not needed by the rest of the patch?
> 
> test_bit was lowered into ipoib_mcast_join, which requires pushing the
> lock unlock down as well. The set_bit side holds that lock.

I see the confusion.  The test bit of SENDONLY isn't protected by the
lock, just the setting and clearing of BUSY.  There isn't any need to
push the locking down into mcast_join just because we are checking
SENDONLY in mcast_join.

>> If the locking changes are needed for some reason, then they likely
>> need to be their own patch with their own changelog.
> 
> It doesn't make sense to move the locking without the code motion that
> motivates it, IMHO.

Sure, I agree with you on that point.  I thought you were changing the
locking for some other reason that I wasn't groking, I didn't think you
were doing that for the SENDONLY flag test.


-- 
Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
              GPG KeyID: 0E572FDD



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 884 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]             ` <55DCB56F.5000001-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2015-08-25 19:49               ` Jason Gunthorpe
       [not found]                 ` <20150825194945.GA22335-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Jason Gunthorpe @ 2015-08-25 19:49 UTC (permalink / raw)
  To: Doug Ledford; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Tue, Aug 25, 2015 at 02:35:27PM -0400, Doug Ledford wrote:
> On 08/25/2015 02:22 PM, Jason Gunthorpe wrote:
> > On Tue, Aug 25, 2015 at 01:50:05PM -0400, Doug Ledford wrote:
> >> On 08/21/2015 07:34 PM, Jason Gunthorpe wrote:
> >>> Even though we don't expect the group to be created by the SM we
> >>> sill need to provide all the parameters to force the SM to validate
> >>> they are correct.
> >>
> >> Why does this patch embed locking changes that, as far I can tell, are
> >> not needed by the rest of the patch?
> > 
> > test_bit was lowered into ipoib_mcast_join, which requires pushing the
> > lock unlock down as well. The set_bit side holds that lock.
> 
> I see the confusion.  The test bit of SENDONLY isn't protected by the
> lock, just the setting and clearing of BUSY.

Well, I don't like to see locking elided unless necessary. The flags
is clearly lock protected data, the lock should be held when accessing
it - even if one can reason some of the locking away today. That just
increases maintainability and clarity.

In this instance pushing the locking is trivial. Do you really want it
gone?

.. and looking at this, I feel justified in this position, because
I noticed a bug in how flags is manipulated.

This:

static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
				   struct ib_sa_mcmember_rec *mcmember)
{
[..]
		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
[..]
			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
			return ret;
		}

Has two unlocked sets for flags. The above is called directly from
the sa callback.

While, the clear_bit/etc in ipoib_mcast_restart_task has a lock held
and no obvious exclusion with the above (the mcast is on the
multicast_list by now)..

Sure looks like these two race:

		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))..
		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);

Resulting in corruption of the flags.

This ugly untested thing sort it..

>From ccfc99859d221ea4dada20e388d50e2cc6be580c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
Date: Tue, 25 Aug 2015 13:27:02 -0600
Subject: [PATCH] IB/ipoib: Do not write to mcast->flags without holding a lock

At a minimum the ipoib_mcast_restart_task could be called at any
time and it will also write the flags resulting in corruption
of the flags value.

Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 2d43ec542b63..077586a867bf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -219,9 +219,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 	/* Set the multicast MTU and cached Q_Key before we attach if it's
 	 * the broadcast group.
 	 */
+	spin_lock_irq(&priv->lock);
 	if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
 		    sizeof (union ib_gid))) {
-		spin_lock_irq(&priv->lock);
 		if (!priv->broadcast) {
 			spin_unlock_irq(&priv->lock);
 			return -EAGAIN;
@@ -244,7 +244,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
 
 		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
-		spin_unlock_irq(&priv->lock);
 		priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
 		set_qkey = 1;
 	}
@@ -254,19 +253,24 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 			ipoib_warn(priv, "multicast group %pI6 already attached\n",
 				   mcast->mcmember.mgid.raw);
 
+			spin_unlock_irq(&priv->lock);
 			return 0;
 		}
 
+		spin_unlock_irq(&priv->lock);
 		ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
 					 &mcast->mcmember.mgid, set_qkey);
 		if (ret < 0) {
 			ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
 				   mcast->mcmember.mgid.raw);
 
+			spin_lock_irq(&priv->lock);
 			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
+			spin_unlock_irq(&priv->lock);
 			return ret;
 		}
-	}
+	} else
+		spin_unlock_irq(&priv->lock);
 
 	{
 		struct ib_ah_attr av = {
-- 
2.1.4


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures
       [not found]             ` <20150825162856.GC4425-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2015-08-26  9:41               ` Hal Rosenstock
       [not found]                 ` <55DD89B4.7070502-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Hal Rosenstock @ 2015-08-26  9:41 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 8/25/2015 12:28 PM, Jason Gunthorpe wrote:
> On Tue, Aug 25, 2015 at 08:59:13AM -0400, Hal Rosenstock wrote:
>>> -		if (mcast->logcount++ < 20) {
>>> -			if (status == -ETIMEDOUT || status == -EAGAIN) {
>>> +		bool silent_fail =
>>> +		    test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
>>> +		    status == -EINVAL;
>>
>> Aren't there other reasons that send only join might have EINVAL
>> indicated ?
> 
> Not sure, the layers below all eat the detailed error code. Hopefully
> EINVAL isn't re-used.

AFAIR there are a number of reasons EINVAL could occur here in which
case this makes this change overly silent. If so, this particular
failure case of send only join failure due to SM rejection (perhaps
ERR_REQ_INVALID SA status only) is best to be made unique and different
from the other current EINVAL failures here.

> 
>> Maybe it's better to be overly silent rather than overly
>> verbose as to not spam the log but it seems like it would make debug of
>> such cases harder.
> 
> It makes debugging harder to have worthless messages because they
> obscure what is going on. The first time I saw this I assumed there
> was an issue, but it turns out to be an expected failure.
> 
> The other issue is the way the rate limiting works:
> 
>>> +		if (mcast->logcount < 20) {
>>> +			if (status == -ETIMEDOUT || status == -EAGAIN ||
>>> +			    silent_fail) {
>>>  				ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
>>>  						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
>>>  						mcast->mcmember.mgid.raw, status);
> 
> So wasting logcount with expected failures just results in eating
> unexpected failures...

Yes, the problem is distinguishing an "expected" failure from the real
ones and only logging the real ones.

-- Hal
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]                 ` <20150825194945.GA22335-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2015-08-26 13:37                   ` Doug Ledford
       [not found]                     ` <55DDC12E.6030705-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Doug Ledford @ 2015-08-26 13:37 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 8161 bytes --]

On 08/25/2015 03:49 PM, Jason Gunthorpe wrote:
> On Tue, Aug 25, 2015 at 02:35:27PM -0400, Doug Ledford wrote:
>> On 08/25/2015 02:22 PM, Jason Gunthorpe wrote:
>>> On Tue, Aug 25, 2015 at 01:50:05PM -0400, Doug Ledford wrote:
>>>> On 08/21/2015 07:34 PM, Jason Gunthorpe wrote:
>>>>> Even though we don't expect the group to be created by the SM we
>>>>> sill need to provide all the parameters to force the SM to validate
>>>>> they are correct.
>>>>
>>>> Why does this patch embed locking changes that, as far I can tell, are
>>>> not needed by the rest of the patch?
>>>
>>> test_bit was lowered into ipoib_mcast_join, which requires pushing the
>>> lock unlock down as well. The set_bit side holds that lock.
>>
>> I see the confusion.  The test bit of SENDONLY isn't protected by the
>> lock, just the setting and clearing of BUSY.
> 
> Well, I don't like to see locking elided unless necessary.

The corollary to that is that you shouldn't put locking around something
that doesn't need it just to make yourself feel better.

> The flags
> is clearly lock protected data,

Parts of flags are, not all of it.  Locking is needed to provide
exclusion between things that can race.  If two things can not race,
then the locking is not needed.

> the lock should be held when accessing
> it - even if one can reason some of the locking away today. That just
> increases maintainability and clarity.
> 
> In this instance pushing the locking is trivial. Do you really want it
> gone?

I've been considering leaving it, but for other reasons.  However, the
change is not without problems.  I'll get to that later in this email...

> .. and looking at this, I feel justified in this position, because
> I noticed a bug in how flags is manipulated.
> 
> This:
> 
> static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
> 				   struct ib_sa_mcmember_rec *mcmember)
> {
> [..]
> 		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
> [..]
> 			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
> 			return ret;
> 		}
> 
> Has two unlocked sets for flags. The above is called directly from
> the sa callback.

Correct.

> While, the clear_bit/etc in ipoib_mcast_restart_task has a lock held
> and no obvious exclusion with the above (the mcast is on the
> multicast_list by now)..
> 
> Sure looks like these two race:
> 
> 		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))..
> 		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
> 
> Resulting in corruption of the flags.

Incorrect.  On all arches, the set_bit and friends functions are defined
to be atomic.  For arches without a specific asm implementation, that
means a spin lock internal to the bitops header is taken to prevent
corruption of the flags by multiple, racing attempts to access the bits.
 For x86 arch, the asm specific version uses locked memory instructions
to protect the accessing of the bits.  So simply accessing two different
flags at the same time is not a race condition.  You must be attempting
to access the same flag and then do something based up on that access.
The non-atomic nature of test/response is the only thing that needs locking.

In this particular instance, as you noted, ipoib_mcast_join_finish is
called via the sa callback, which will only ever be called if the BUSY
flag is set.  In restart task, we may set/clear the FOUND flag, but
that's only used to remove our mcast entry from the rbtree and move it
to the remove list.  We complete our sort and release our spinlock and
then we wait for all mcast entries with the BUSY flag set to call
complete(mcast->done) before we proceed any further.  Since we will
never call that complete until well after the tests/sets that you list,
and we will never call ipoib_mcast_leave() and therefore not test or
attempt to unattach based on the ATTACHED flag until after all completes
have come through, we are well outside of any race windows.

Which brings me to what I was alluding to earlier.  In your patch, you
move the complete(mcast->done); to inside the spinlock.  I don't
actually like that.  While the flags are atomic and therefore visible
the moment they are done, any other writes to the mcast struct might be
reordered.  By releasing the spinlock before you call complete(), you
force a write memory barrier and make all changes visible.  That way
anything that was waiting on that complete has a guaranteed clean
picture when they get woke up.  Calling complete() within the spinlock
takes that guarantee away.

As to the reason I was considering leaving it?  In the original code,
the normal path has to release/reacquire the spinlock once on success,
and twice on immediate error.  As per some of the talks at LinuxCon last
week, the cost to simply acquire/release a spinlock is in excess of 30
cycles now (mainly due to the locked memory access, which means all
those atomic bit ops are expensive too and yet we pepper them around in
the code like they were candy sprinkles).  Your code does a single
release/reacquire even in the case of immediate error.  However, it has
the issue of calling complete without releasing the spinlock, which I
don't like.  I suppose a wmb(); before the complete() would work though.

> This ugly untested thing sort it..

This patch is unneeded.  I'm guessing you just forgot that the *_bit
primitives in the kernel are defined as atomic operations.

> From ccfc99859d221ea4dada20e388d50e2cc6be580c Mon Sep 17 00:00:00 2001
> From: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
> Date: Tue, 25 Aug 2015 13:27:02 -0600
> Subject: [PATCH] IB/ipoib: Do not write to mcast->flags without holding a lock
> 
> At a minimum the ipoib_mcast_restart_task could be called at any
> time and it will also write the flags resulting in corruption
> of the flags value.
> 
> Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
> ---
>  drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 10 +++++++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> index 2d43ec542b63..077586a867bf 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> @@ -219,9 +219,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
>  	/* Set the multicast MTU and cached Q_Key before we attach if it's
>  	 * the broadcast group.
>  	 */
> +	spin_lock_irq(&priv->lock);
>  	if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
>  		    sizeof (union ib_gid))) {
> -		spin_lock_irq(&priv->lock);
>  		if (!priv->broadcast) {
>  			spin_unlock_irq(&priv->lock);
>  			return -EAGAIN;
> @@ -244,7 +244,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
>  			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
>  
>  		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
> -		spin_unlock_irq(&priv->lock);
>  		priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
>  		set_qkey = 1;
>  	}
> @@ -254,19 +253,24 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
>  			ipoib_warn(priv, "multicast group %pI6 already attached\n",
>  				   mcast->mcmember.mgid.raw);
>  
> +			spin_unlock_irq(&priv->lock);
>  			return 0;
>  		}
>  
> +		spin_unlock_irq(&priv->lock);
>  		ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
>  					 &mcast->mcmember.mgid, set_qkey);
>  		if (ret < 0) {
>  			ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n",
>  				   mcast->mcmember.mgid.raw);
>  
> +			spin_lock_irq(&priv->lock);
>  			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
> +			spin_unlock_irq(&priv->lock);
>  			return ret;
>  		}
> -	}
> +	} else
> +		spin_unlock_irq(&priv->lock);
>  
>  	{
>  		struct ib_ah_attr av = {
> 


-- 
Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
              GPG KeyID: 0E572FDD



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 884 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]                     ` <55DDC12E.6030705-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2015-08-26 16:18                       ` Jason Gunthorpe
       [not found]                         ` <20150826161829.GA27407-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Jason Gunthorpe @ 2015-08-26 16:18 UTC (permalink / raw)
  To: Doug Ledford; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Wed, Aug 26, 2015 at 09:37:50AM -0400, Doug Ledford wrote:

> > Sure looks like these two race:
> > 
> > 		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))..
> > 		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
> > 
> > Resulting in corruption of the flags.
> 
> Incorrect.  On all arches, the set_bit and friends functions are defined
> to be atomic.

Ah, right, I always forget about that because they don't use the
atomic type or naming scheme like everything else :|

> Which brings me to what I was alluding to earlier.  In your patch, you
> move the complete(mcast->done); to inside the spinlock.  I don't
> actually like that.  While the flags are atomic and therefore visible
> the moment they are done, any other writes to the mcast struct might be
> reordered.  By releasing the spinlock before you call complete(), you
> force a write memory barrier and make all changes visible.  That way
> anything that was waiting on that complete has a guaranteed clean
> picture when they get woke up.  Calling complete() within the spinlock
> takes that guarantee away.

complete() is a guarenteed memory barrier.

/**
 * complete: - signals a single thread waiting on this completion
 * @x:  holds the state of this particular completion
 [..]
 * It may be assumed that this function implies a write memory barrier before
 * changing the task state if and only if any tasks are woken up.
 */

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]                         ` <20150826161829.GA27407-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2015-08-26 16:43                           ` Doug Ledford
       [not found]                             ` <55DDECA3.4010207-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Doug Ledford @ 2015-08-26 16:43 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 2143 bytes --]

On 08/26/2015 12:18 PM, Jason Gunthorpe wrote:
> On Wed, Aug 26, 2015 at 09:37:50AM -0400, Doug Ledford wrote:
> 
>>> Sure looks like these two race:
>>>
>>> 		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))..
>>> 		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
>>>
>>> Resulting in corruption of the flags.
>>
>> Incorrect.  On all arches, the set_bit and friends functions are defined
>> to be atomic.
> 
> Ah, right, I always forget about that because they don't use the
> atomic type or naming scheme like everything else :|
> 
>> Which brings me to what I was alluding to earlier.  In your patch, you
>> move the complete(mcast->done); to inside the spinlock.  I don't
>> actually like that.  While the flags are atomic and therefore visible
>> the moment they are done, any other writes to the mcast struct might be
>> reordered.  By releasing the spinlock before you call complete(), you
>> force a write memory barrier and make all changes visible.  That way
>> anything that was waiting on that complete has a guaranteed clean
>> picture when they get woke up.  Calling complete() within the spinlock
>> takes that guarantee away.
> 
> complete() is a guarenteed memory barrier.
> 
> /**
>  * complete: - signals a single thread waiting on this completion
>  * @x:  holds the state of this particular completion
>  [..]
>  * It may be assumed that this function implies a write memory barrier before
>  * changing the task state if and only if any tasks are woken up.
>  */

That still takes us back to the fact that the locking changes are
unneeded.  I'm not opposed to them, but as you mentioned in your first
email, they should go with the changes that require them, and none of
the changes in the first patch require them.  Which means that if we
want to keep them, it might be worth splitting them out and giving them
their own patch with an explanation of why they are a benefit (lightly
contended code, saves a release/reacquire on the failure path).


-- 
Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
              GPG KeyID: 0E572FDD



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 884 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]                             ` <55DDECA3.4010207-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2015-08-27 23:33                               ` Jason Gunthorpe
       [not found]                                 ` <20150827233322.GA29724-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Jason Gunthorpe @ 2015-08-27 23:33 UTC (permalink / raw)
  To: Doug Ledford; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Wed, Aug 26, 2015 at 12:43:15PM -0400, Doug Ledford wrote:

> That still takes us back to the fact that the locking changes are
> unneeded.  I'm not opposed to them, but as you mentioned in your first
> email, they should go with the changes that require them, and none of
> the changes in the first patch require them.  Which means that if we
> want to keep them, it might be worth splitting them out and giving them
> their own patch with an explanation of why they are a benefit (lightly
> contended code, saves a release/reacquire on the failure path).

Lets just drop them, the cost for restructing was an added empty lock
grab on a non-error path.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures
       [not found]                 ` <55DD89B4.7070502-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
@ 2015-08-27 23:34                   ` Jason Gunthorpe
       [not found]                     ` <20150827233428.GB29724-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Jason Gunthorpe @ 2015-08-27 23:34 UTC (permalink / raw)
  To: Hal Rosenstock; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Wed, Aug 26, 2015 at 05:41:08AM -0400, Hal Rosenstock wrote:
> On 8/25/2015 12:28 PM, Jason Gunthorpe wrote:
> > On Tue, Aug 25, 2015 at 08:59:13AM -0400, Hal Rosenstock wrote:
> >>> -		if (mcast->logcount++ < 20) {
> >>> -			if (status == -ETIMEDOUT || status == -EAGAIN) {
> >>> +		bool silent_fail =
> >>> +		    test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
> >>> +		    status == -EINVAL;
> >>
> >> Aren't there other reasons that send only join might have EINVAL
> >> indicated ?
> > 
> > Not sure, the layers below all eat the detailed error code. Hopefully
> > EINVAL isn't re-used.
> 
> AFAIR there are a number of reasons EINVAL could occur here in which
> case this makes this change overly silent. If so, this particular
> failure case of send only join failure due to SM rejection (perhaps
> ERR_REQ_INVALID SA status only) is best to be made unique and different
> from the other current EINVAL failures here.

That is way to much to undertake just to silence this message.

Unless you know the other EINVALs are likely to happen, I'd just
ignore this imperfection.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures
       [not found]                     ` <20150827233428.GB29724-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2015-08-28 13:58                       ` Hal Rosenstock
  0 siblings, 0 replies; 22+ messages in thread
From: Hal Rosenstock @ 2015-08-28 13:58 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On 8/27/2015 7:34 PM, Jason Gunthorpe wrote:
> On Wed, Aug 26, 2015 at 05:41:08AM -0400, Hal Rosenstock wrote:
>> On 8/25/2015 12:28 PM, Jason Gunthorpe wrote:
>>> On Tue, Aug 25, 2015 at 08:59:13AM -0400, Hal Rosenstock wrote:
>>>>> -		if (mcast->logcount++ < 20) {
>>>>> -			if (status == -ETIMEDOUT || status == -EAGAIN) {
>>>>> +		bool silent_fail =
>>>>> +		    test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
>>>>> +		    status == -EINVAL;
>>>>
>>>> Aren't there other reasons that send only join might have EINVAL
>>>> indicated ?
>>>
>>> Not sure, the layers below all eat the detailed error code. Hopefully
>>> EINVAL isn't re-used.
>>
>> AFAIR there are a number of reasons EINVAL could occur here in which
>> case this makes this change overly silent. If so, this particular
>> failure case of send only join failure due to SM rejection (perhaps
>> ERR_REQ_INVALID SA status only)

I meant ERR_REQ_INSUFFICIENT_COMPONENTS here.

>> is best to be made unique and different
>> from the other current EINVAL failures here.
> 
> That is way to much to undertake just to silence this message.
>
> Unless you know the other EINVALs are likely to happen, I'd just
> ignore this imperfection.

That's probably the only reasonable choice in the short run :-(

-- Hal

> 
> Jason
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found] ` <1440200053-18890-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
                     ` (2 preceding siblings ...)
  2015-08-25 17:50   ` Doug Ledford
@ 2015-09-03 16:50   ` Christoph Lameter
  3 siblings, 0 replies; 22+ messages in thread
From: Christoph Lameter @ 2015-09-03 16:50 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Fri, 21 Aug 2015, Jason Gunthorpe wrote:

> Even though we don't expect the group to be created by the SM we
> sill need to provide all the parameters to force the SM to validate
> they are correct.

Just ran into this issue with Redhat 7.1. Earlier code base. Same
problem. qkey etc not set. OFED-1.5.4.1 did sets these parameters
correctly and it was a surprise that the kernel IB stack never had these
fixes.

The way sendonly joins are handled now is a bit bothering me now. The join
is delayed? How can that work? We are sending the multicast packets before
the join is complete? This means the multicast packets are going to be
dropped since there is no multicast subscription.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]                                 ` <20150827233322.GA29724-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2015-09-03 21:17                                   ` Doug Ledford
       [not found]                                     ` <55E8B8EB.7000009-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Doug Ledford @ 2015-09-03 21:17 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 934 bytes --]

On 08/27/2015 07:33 PM, Jason Gunthorpe wrote:
> On Wed, Aug 26, 2015 at 12:43:15PM -0400, Doug Ledford wrote:
> 
>> That still takes us back to the fact that the locking changes are
>> unneeded.  I'm not opposed to them, but as you mentioned in your first
>> email, they should go with the changes that require them, and none of
>> the changes in the first patch require them.  Which means that if we
>> want to keep them, it might be worth splitting them out and giving them
>> their own patch with an explanation of why they are a benefit (lightly
>> contended code, saves a release/reacquire on the failure path).
> 
> Lets just drop them, the cost for restructing was an added empty lock
> grab on a non-error path.

I've reworked the patch to not perform any locking changes and applied
the result.


-- 
Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
              GPG KeyID: 0E572FDD



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 884 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures
       [not found]     ` <1440200053-18890-2-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2015-08-25 12:59       ` Hal Rosenstock
@ 2015-09-03 21:20       ` Doug Ledford
  1 sibling, 0 replies; 22+ messages in thread
From: Doug Ledford @ 2015-09-03 21:20 UTC (permalink / raw)
  To: Jason Gunthorpe, linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 735 bytes --]

On 08/21/2015 07:34 PM, Jason Gunthorpe wrote:
> We expect send only joins to fail, it just means there are no listeners
> for the group. The correct thing to do is silently drop the packet
> at source.
> 
> Eg avahi will full join 224.0.0.251 which causes a send only IGMP packet
> to 224.0.0.22, and then a warning level kmessage like this:
> 
>  ib0: sendonly multicast join failed for ff12:401b:ffff:0000:0000:0000:0000:0016, status -22
> 
> If there is no IP router listening to IGMP.
> 
> Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>

Thanks, applied.


-- 
Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
              GPG KeyID: 0E572FDD



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 884 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]                                     ` <55E8B8EB.7000009-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2015-09-03 21:23                                       ` Jason Gunthorpe
       [not found]                                         ` <20150903212307.GA8026-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 22+ messages in thread
From: Jason Gunthorpe @ 2015-09-03 21:23 UTC (permalink / raw)
  To: Doug Ledford; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

On Thu, Sep 03, 2015 at 05:17:31PM -0400, Doug Ledford wrote:
> On 08/27/2015 07:33 PM, Jason Gunthorpe wrote:
> > On Wed, Aug 26, 2015 at 12:43:15PM -0400, Doug Ledford wrote:
> > 
> >> That still takes us back to the fact that the locking changes are
> >> unneeded.  I'm not opposed to them, but as you mentioned in your first
> >> email, they should go with the changes that require them, and none of
> >> the changes in the first patch require them.  Which means that if we
> >> want to keep them, it might be worth splitting them out and giving them
> >> their own patch with an explanation of why they are a benefit (lightly
> >> contended code, saves a release/reacquire on the failure path).
> > 
> > Lets just drop them, the cost for restructing was an added empty lock
> > grab on a non-error path.
> 
> I've reworked the patch to not perform any locking changes and applied
> the result.

Thanks Doug, I've been too busy to look at this..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins
       [not found]                                         ` <20150903212307.GA8026-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2015-09-03 21:25                                           ` Doug Ledford
  0 siblings, 0 replies; 22+ messages in thread
From: Doug Ledford @ 2015-09-03 21:25 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 1157 bytes --]

On 09/03/2015 05:23 PM, Jason Gunthorpe wrote:
> On Thu, Sep 03, 2015 at 05:17:31PM -0400, Doug Ledford wrote:
>> On 08/27/2015 07:33 PM, Jason Gunthorpe wrote:
>>> On Wed, Aug 26, 2015 at 12:43:15PM -0400, Doug Ledford wrote:
>>>
>>>> That still takes us back to the fact that the locking changes are
>>>> unneeded.  I'm not opposed to them, but as you mentioned in your first
>>>> email, they should go with the changes that require them, and none of
>>>> the changes in the first patch require them.  Which means that if we
>>>> want to keep them, it might be worth splitting them out and giving them
>>>> their own patch with an explanation of why they are a benefit (lightly
>>>> contended code, saves a release/reacquire on the failure path).
>>>
>>> Lets just drop them, the cost for restructing was an added empty lock
>>> grab on a non-error path.
>>
>> I've reworked the patch to not perform any locking changes and applied
>> the result.
> 
> Thanks Doug, I've been too busy to look at this..

You're welcome ;-)

-- 
Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
              GPG KeyID: 0E572FDD



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 884 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2015-09-03 21:25 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-21 23:34 [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins Jason Gunthorpe
     [not found] ` <1440200053-18890-1-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-08-21 23:34   ` [PATCH 2/2] IB/ipoib: Suppress warning for send only join failures Jason Gunthorpe
     [not found]     ` <1440200053-18890-2-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-08-25 12:59       ` Hal Rosenstock
     [not found]         ` <55DC66A1.9040207-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-08-25 16:28           ` Jason Gunthorpe
     [not found]             ` <20150825162856.GC4425-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-08-26  9:41               ` Hal Rosenstock
     [not found]                 ` <55DD89B4.7070502-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-08-27 23:34                   ` Jason Gunthorpe
     [not found]                     ` <20150827233428.GB29724-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-08-28 13:58                       ` Hal Rosenstock
2015-09-03 21:20       ` Doug Ledford
2015-08-25 12:58   ` [PATCH 1/2] IB/ipoib: Clean up send-only multicast joins Hal Rosenstock
     [not found]     ` <55DC667A.30208-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
2015-08-25 16:24       ` Jason Gunthorpe
2015-08-25 17:50   ` Doug Ledford
     [not found]     ` <55DCAACD.3000307-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-08-25 18:22       ` Jason Gunthorpe
     [not found]         ` <20150825182233.GA20744-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-08-25 18:35           ` Doug Ledford
     [not found]             ` <55DCB56F.5000001-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-08-25 19:49               ` Jason Gunthorpe
     [not found]                 ` <20150825194945.GA22335-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-08-26 13:37                   ` Doug Ledford
     [not found]                     ` <55DDC12E.6030705-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-08-26 16:18                       ` Jason Gunthorpe
     [not found]                         ` <20150826161829.GA27407-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-08-26 16:43                           ` Doug Ledford
     [not found]                             ` <55DDECA3.4010207-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-08-27 23:33                               ` Jason Gunthorpe
     [not found]                                 ` <20150827233322.GA29724-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-09-03 21:17                                   ` Doug Ledford
     [not found]                                     ` <55E8B8EB.7000009-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2015-09-03 21:23                                       ` Jason Gunthorpe
     [not found]                                         ` <20150903212307.GA8026-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2015-09-03 21:25                                           ` Doug Ledford
2015-09-03 16:50   ` Christoph Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).