linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drivers/rxe: improve rxe loopback
@ 2017-07-26 14:52 Marcel Apfelbaum
  2017-07-26 19:36 ` Yuval Shaia
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Marcel Apfelbaum @ 2017-07-26 14:52 UTC (permalink / raw)
  To: linux-rdma
  Cc: linux-kernel, monis, dledford, sean.hefty, hal.rosenstock,
	marcel, yuval.shaia

Currently a packet is marked for loopback only if the source and
destination address match. This is not enough when multiple
gids are present in rxe's gid table and the traffic is
from one gid to another.

Fix it by marking the packet for loopback if the destination
address appears in rxe's gid table.

Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index c3a140e..b76a9a3 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
 	ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
 }
 
+static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
+{
+	struct in_device *in_dev;
+	bool same_rxe = false;
+
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(rxe->ndev);
+	if (!in_dev)
+		goto out;
+
+	for_ifa(in_dev)
+		if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
+			same_rxe = true;
+			goto out;
+		}
+	endfor_ifa(in_dev);
+out:
+	rcu_read_unlock();
+	return same_rxe;
+}
+
 static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 		    struct sk_buff *skb, struct rxe_av *av)
 {
@@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 		return -EHOSTUNREACH;
 	}
 
-	if (!memcmp(saddr, daddr, sizeof(*daddr)))
+	if (addr4_same_rxe(rxe, daddr))
 		pkt->mask |= RXE_LOOPBACK_MASK;
 
 	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
@@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	return 0;
 }
 
+static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
+{
+	struct inet6_dev *in6_dev;
+	struct inet6_ifaddr *ifp;
+	bool same_rxe = false;
+
+	in6_dev = in6_dev_get(rxe->ndev);
+	if (!in6_dev)
+		return false;
+
+	read_lock_bh(&in6_dev->lock);
+	list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
+		if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
+			same_rxe = true;
+			goto out;
+		}
+out:
+	read_unlock_bh(&in6_dev->lock);
+	in6_dev_put(in6_dev);
+	return same_rxe;
+}
+
 static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 		    struct sk_buff *skb, struct rxe_av *av)
 {
@@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 		return -EHOSTUNREACH;
 	}
 
-	if (!memcmp(saddr, daddr, sizeof(*daddr)))
+	if (addr6_same_rxe(rxe, daddr))
 		pkt->mask |= RXE_LOOPBACK_MASK;
 
 	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-26 14:52 [PATCH] drivers/rxe: improve rxe loopback Marcel Apfelbaum
@ 2017-07-26 19:36 ` Yuval Shaia
  2017-07-26 19:56   ` Yuval Shaia
  2017-07-26 19:57 ` Yuval Shaia
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 12+ messages in thread
From: Yuval Shaia @ 2017-07-26 19:36 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: linux-rdma, linux-kernel, monis, dledford, sean.hefty, hal.rosenstock

On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
> Currently a packet is marked for loopback only if the source and
> destination address match. This is not enough when multiple
> gids are present in rxe's gid table and the traffic is
> from one gid to another.
> 
> Fix it by marking the packet for loopback if the destination
> address appears in rxe's gid table.
> 
> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> ---
>  drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index c3a140e..b76a9a3 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
>  	ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
>  }
>  
> +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
> +{
> +	struct in_device *in_dev;
> +	bool same_rxe = false;
> +
> +	rcu_read_lock();
> +	in_dev = __in_dev_get_rcu(rxe->ndev);
> +	if (!in_dev)
> +		goto out;
> +
> +	for_ifa(in_dev)
> +		if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
> +			same_rxe = true;
> +			goto out;
> +		}
> +	endfor_ifa(in_dev);

The above endfor_ifa should move to below.

> +out:
> +	rcu_read_unlock();
> +	return same_rxe;
> +}
> +
>  static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		    struct sk_buff *skb, struct rxe_av *av)
>  {
> @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		return -EHOSTUNREACH;
>  	}
>  
> -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
> +	if (addr4_same_rxe(rxe, daddr))
>  		pkt->mask |= RXE_LOOPBACK_MASK;
>  
>  	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  	return 0;
>  }
>  
> +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
> +{
> +	struct inet6_dev *in6_dev;
> +	struct inet6_ifaddr *ifp;
> +	bool same_rxe = false;
> +
> +	in6_dev = in6_dev_get(rxe->ndev);
> +	if (!in6_dev)
> +		return false;
> +
> +	read_lock_bh(&in6_dev->lock);
> +	list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
> +		if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
> +			same_rxe = true;
> +			goto out;
> +		}
> +out:
> +	read_unlock_bh(&in6_dev->lock);
> +	in6_dev_put(in6_dev);
> +	return same_rxe;
> +}
> +
>  static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		    struct sk_buff *skb, struct rxe_av *av)
>  {
> @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		return -EHOSTUNREACH;
>  	}
>  
> -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
> +	if (addr6_same_rxe(rxe, daddr))
>  		pkt->mask |= RXE_LOOPBACK_MASK;
>  
>  	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> -- 
> 2.9.4
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-26 19:36 ` Yuval Shaia
@ 2017-07-26 19:56   ` Yuval Shaia
  0 siblings, 0 replies; 12+ messages in thread
From: Yuval Shaia @ 2017-07-26 19:56 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: linux-rdma, linux-kernel, monis, dledford, sean.hefty, hal.rosenstock

> > +	endfor_ifa(in_dev);
> 
> The above endfor_ifa should move to below.

Please ignore, my mistake.

> 
> > +out:

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-26 14:52 [PATCH] drivers/rxe: improve rxe loopback Marcel Apfelbaum
  2017-07-26 19:36 ` Yuval Shaia
@ 2017-07-26 19:57 ` Yuval Shaia
  2017-07-27  7:04   ` Moni Shoua
  2017-07-27  7:36 ` Leon Romanovsky
  2017-07-27 13:47 ` kbuild test robot
  3 siblings, 1 reply; 12+ messages in thread
From: Yuval Shaia @ 2017-07-26 19:57 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: linux-rdma, linux-kernel, monis, dledford, sean.hefty, hal.rosenstock

On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
> Currently a packet is marked for loopback only if the source and
> destination address match. This is not enough when multiple
> gids are present in rxe's gid table and the traffic is
> from one gid to another.
> 
> Fix it by marking the packet for loopback if the destination
> address appears in rxe's gid table.
> 
> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>

Tested-by: Yuval Shaia <yuval.shaia@oracle.com>

> ---
>  drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index c3a140e..b76a9a3 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
>  	ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
>  }
>  
> +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
> +{
> +	struct in_device *in_dev;
> +	bool same_rxe = false;
> +
> +	rcu_read_lock();
> +	in_dev = __in_dev_get_rcu(rxe->ndev);
> +	if (!in_dev)
> +		goto out;
> +
> +	for_ifa(in_dev)
> +		if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
> +			same_rxe = true;
> +			goto out;
> +		}
> +	endfor_ifa(in_dev);
> +out:
> +	rcu_read_unlock();
> +	return same_rxe;
> +}
> +
>  static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		    struct sk_buff *skb, struct rxe_av *av)
>  {
> @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		return -EHOSTUNREACH;
>  	}
>  
> -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
> +	if (addr4_same_rxe(rxe, daddr))
>  		pkt->mask |= RXE_LOOPBACK_MASK;
>  
>  	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  	return 0;
>  }
>  
> +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
> +{
> +	struct inet6_dev *in6_dev;
> +	struct inet6_ifaddr *ifp;
> +	bool same_rxe = false;
> +
> +	in6_dev = in6_dev_get(rxe->ndev);
> +	if (!in6_dev)
> +		return false;
> +
> +	read_lock_bh(&in6_dev->lock);
> +	list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
> +		if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
> +			same_rxe = true;
> +			goto out;
> +		}
> +out:
> +	read_unlock_bh(&in6_dev->lock);
> +	in6_dev_put(in6_dev);
> +	return same_rxe;
> +}
> +
>  static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		    struct sk_buff *skb, struct rxe_av *av)
>  {
> @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		return -EHOSTUNREACH;
>  	}
>  
> -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
> +	if (addr6_same_rxe(rxe, daddr))
>  		pkt->mask |= RXE_LOOPBACK_MASK;
>  
>  	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> -- 
> 2.9.4
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-26 19:57 ` Yuval Shaia
@ 2017-07-27  7:04   ` Moni Shoua
  2017-07-27  9:55     ` Marcel Apfelbaum
  0 siblings, 1 reply; 12+ messages in thread
From: Moni Shoua @ 2017-07-27  7:04 UTC (permalink / raw)
  To: Yuval Shaia
  Cc: Marcel Apfelbaum, linux-rdma, Linux Kernel Mailinglist,
	Doug Ledford, Sean Hefty, Hal Rosenstock

On Wed, Jul 26, 2017 at 10:57 PM, Yuval Shaia <yuval.shaia@oracle.com> wrote:
> On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
>> Currently a packet is marked for loopback only if the source and
>> destination address match. This is not enough when multiple
>> gids are present in rxe's gid table and the traffic is
>> from one gid to another.
>>
>> Fix it by marking the packet for loopback if the destination
>> address appears in rxe's gid table.
>>
>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>
Have you considered using ip_route_output_key() for IPv4 or
ip6_route_output() for IPv6 to decide if  this is a loopback?
For reference you can check the flow starting at  rdma_resolve_ip()

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-26 14:52 [PATCH] drivers/rxe: improve rxe loopback Marcel Apfelbaum
  2017-07-26 19:36 ` Yuval Shaia
  2017-07-26 19:57 ` Yuval Shaia
@ 2017-07-27  7:36 ` Leon Romanovsky
  2017-07-27  9:49   ` Marcel Apfelbaum
  2017-07-27 13:47 ` kbuild test robot
  3 siblings, 1 reply; 12+ messages in thread
From: Leon Romanovsky @ 2017-07-27  7:36 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: linux-rdma, linux-kernel, monis, dledford, sean.hefty,
	hal.rosenstock, yuval.shaia

[-- Attachment #1: Type: text/plain, Size: 3397 bytes --]

On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
> Currently a packet is marked for loopback only if the source and
> destination address match. This is not enough when multiple
> gids are present in rxe's gid table and the traffic is
> from one gid to another.
>
> Fix it by marking the packet for loopback if the destination
> address appears in rxe's gid table.
>
> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> ---
>  drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index c3a140e..b76a9a3 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
>  	ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
>  }
>
> +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
> +{

In addition to Moni's comment, no "inline" functions in *.c files, please.

> +	struct in_device *in_dev;
> +	bool same_rxe = false;
> +
> +	rcu_read_lock();
> +	in_dev = __in_dev_get_rcu(rxe->ndev);
> +	if (!in_dev)
> +		goto out;
> +
> +	for_ifa(in_dev)
> +		if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
> +			same_rxe = true;
> +			goto out;
> +		}
> +	endfor_ifa(in_dev);

I'm afraid that it will decrease performance drastically. One of the
possible solutions to overcome it, is to check the address of first packet
only, but it will work for RC only.

> +out:
> +	rcu_read_unlock();
> +	return same_rxe;
> +}
> +
>  static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		    struct sk_buff *skb, struct rxe_av *av)
>  {
> @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		return -EHOSTUNREACH;
>  	}
>
> -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
> +	if (addr4_same_rxe(rxe, daddr))
>  		pkt->mask |= RXE_LOOPBACK_MASK;
>
>  	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  	return 0;
>  }
>
> +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
> +{

Ditto

> +	struct inet6_dev *in6_dev;
> +	struct inet6_ifaddr *ifp;
> +	bool same_rxe = false;
> +
> +	in6_dev = in6_dev_get(rxe->ndev);
> +	if (!in6_dev)
> +		return false;
> +
> +	read_lock_bh(&in6_dev->lock);
> +	list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
> +		if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
> +			same_rxe = true;
> +			goto out;
> +		}
> +out:
> +	read_unlock_bh(&in6_dev->lock);
> +	in6_dev_put(in6_dev);
> +	return same_rxe;
> +}
> +
>  static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		    struct sk_buff *skb, struct rxe_av *av)
>  {
> @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>  		return -EHOSTUNREACH;
>  	}
>
> -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
> +	if (addr6_same_rxe(rxe, daddr))
>  		pkt->mask |= RXE_LOOPBACK_MASK;
>
>  	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> --
> 2.9.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-27  7:36 ` Leon Romanovsky
@ 2017-07-27  9:49   ` Marcel Apfelbaum
  2017-07-27 10:40     ` Leon Romanovsky
  0 siblings, 1 reply; 12+ messages in thread
From: Marcel Apfelbaum @ 2017-07-27  9:49 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: linux-rdma, linux-kernel, monis, dledford, sean.hefty,
	hal.rosenstock, yuval.shaia

On 27/07/2017 10:36, Leon Romanovsky wrote:
> On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
>> Currently a packet is marked for loopback only if the source and
>> destination address match. This is not enough when multiple
>> gids are present in rxe's gid table and the traffic is
>> from one gid to another.
>>
>> Fix it by marking the packet for loopback if the destination
>> address appears in rxe's gid table.
>>
>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>> ---
>>   drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
>>   1 file changed, 45 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
>> index c3a140e..b76a9a3 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>> @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
>>   	ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
>>   }
>>
>> +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
>> +{

Hi Leon,
Thanks for the review.

> 
> In addition to Moni's comment, no "inline" functions in *.c files, please.
> 

Sure, I simply followed the function on the same file:
   static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
I even borrowed the name...

>> +	struct in_device *in_dev;
>> +	bool same_rxe = false;
>> +
>> +	rcu_read_lock();
>> +	in_dev = __in_dev_get_rcu(rxe->ndev);
>> +	if (!in_dev)
>> +		goto out;
>> +
>> +	for_ifa(in_dev)
>> +		if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
>> +			same_rxe = true;
>> +			goto out;
>> +		}
>> +	endfor_ifa(in_dev);
> 
> I'm afraid that it will decrease performance drastically. One of the
> possible solutions to overcome it, is to check the address of first packet
> only, but it will work for RC only.
> 

How do you know is "the first" packet?
And yes, for UD the performance would decrease, but only
if the netdev has multiple IPs, right?

I'll ask on Moni's response mail for alternatives.

Thanks,
Marcel

>> +out:
>> +	rcu_read_unlock();
>> +	return same_rxe;
>> +}
>> +
>>   static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>>   		    struct sk_buff *skb, struct rxe_av *av)
>>   {
>> @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>>   		return -EHOSTUNREACH;
>>   	}
>>
>> -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
>> +	if (addr4_same_rxe(rxe, daddr))
>>   		pkt->mask |= RXE_LOOPBACK_MASK;
>>
>>   	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
>> @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>>   	return 0;
>>   }
>>
>> +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
>> +{
> 
> Ditto
> 
>> +	struct inet6_dev *in6_dev;
>> +	struct inet6_ifaddr *ifp;
>> +	bool same_rxe = false;
>> +
>> +	in6_dev = in6_dev_get(rxe->ndev);
>> +	if (!in6_dev)
>> +		return false;
>> +
>> +	read_lock_bh(&in6_dev->lock);
>> +	list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
>> +		if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
>> +			same_rxe = true;
>> +			goto out;
>> +		}
>> +out:
>> +	read_unlock_bh(&in6_dev->lock);
>> +	in6_dev_put(in6_dev);
>> +	return same_rxe;
>> +}
>> +
>>   static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>>   		    struct sk_buff *skb, struct rxe_av *av)
>>   {
>> @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
>>   		return -EHOSTUNREACH;
>>   	}
>>
>> -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
>> +	if (addr6_same_rxe(rxe, daddr))
>>   		pkt->mask |= RXE_LOOPBACK_MASK;
>>
>>   	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
>> --
>> 2.9.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-27  7:04   ` Moni Shoua
@ 2017-07-27  9:55     ` Marcel Apfelbaum
  2017-07-30  9:57       ` Moni Shoua
  0 siblings, 1 reply; 12+ messages in thread
From: Marcel Apfelbaum @ 2017-07-27  9:55 UTC (permalink / raw)
  To: Moni Shoua, Yuval Shaia
  Cc: linux-rdma, Linux Kernel Mailinglist, Doug Ledford, Sean Hefty,
	Hal Rosenstock

On 27/07/2017 10:04, Moni Shoua wrote:
> On Wed, Jul 26, 2017 at 10:57 PM, Yuval Shaia <yuval.shaia@oracle.com> wrote:
>> On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
>>> Currently a packet is marked for loopback only if the source and
>>> destination address match. This is not enough when multiple
>>> gids are present in rxe's gid table and the traffic is
>>> from one gid to another.
>>>
>>> Fix it by marking the packet for loopback if the destination
>>> address appears in rxe's gid table.
>>>
>>> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
>>
> Have you considered using ip_route_output_key() for IPv4 or
> ip6_route_output() for IPv6 to decide if  this is a loopback?
> For reference you can check the flow starting at  rdma_resolve_ip()
> 

Hi Moni,

Yes, I had looked into it, but I haven't seen how I can find
out if the destination IP belongs to the same RXE.
The loopback flag will give us the "same host"
confirmation, but not the same rxe instance, right?

Any ideas would be welcomed.

Thanks,
Marcel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-27  9:49   ` Marcel Apfelbaum
@ 2017-07-27 10:40     ` Leon Romanovsky
  0 siblings, 0 replies; 12+ messages in thread
From: Leon Romanovsky @ 2017-07-27 10:40 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: linux-rdma, linux-kernel, monis, dledford, sean.hefty,
	hal.rosenstock, yuval.shaia

[-- Attachment #1: Type: text/plain, Size: 4492 bytes --]

On Thu, Jul 27, 2017 at 12:49:17PM +0300, Marcel Apfelbaum wrote:
> On 27/07/2017 10:36, Leon Romanovsky wrote:
> > On Wed, Jul 26, 2017 at 05:52:48PM +0300, Marcel Apfelbaum wrote:
> > > Currently a packet is marked for loopback only if the source and
> > > destination address match. This is not enough when multiple
> > > gids are present in rxe's gid table and the traffic is
> > > from one gid to another.
> > >
> > > Fix it by marking the packet for loopback if the destination
> > > address appears in rxe's gid table.
> > >
> > > Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
> > > ---
> > >   drivers/infiniband/sw/rxe/rxe_net.c | 47 +++++++++++++++++++++++++++++++++++--
> > >   1 file changed, 45 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> > > index c3a140e..b76a9a3 100644
> > > --- a/drivers/infiniband/sw/rxe/rxe_net.c
> > > +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> > > @@ -351,6 +351,27 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
> > >   	ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
> > >   }
> > >
> > > +static inline bool addr4_same_rxe(struct rxe_dev *rxe, struct in_addr *daddr)
> > > +{
>
> Hi Leon,
> Thanks for the review.
>
> >
> > In addition to Moni's comment, no "inline" functions in *.c files, please.
> >
>
> Sure, I simply followed the function on the same file:
>   static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
> I even borrowed the name...
>
> > > +	struct in_device *in_dev;
> > > +	bool same_rxe = false;
> > > +
> > > +	rcu_read_lock();
> > > +	in_dev = __in_dev_get_rcu(rxe->ndev);
> > > +	if (!in_dev)
> > > +		goto out;
> > > +
> > > +	for_ifa(in_dev)
> > > +		if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) {
> > > +			same_rxe = true;
> > > +			goto out;
> > > +		}
> > > +	endfor_ifa(in_dev);
> >
> > I'm afraid that it will decrease performance drastically. One of the
> > possible solutions to overcome it, is to check the address of first packet
> > only, but it will work for RC only.
> >
>
> How do you know is "the first" packet?
> And yes, for UD the performance would decrease, but only
> if the netdev has multiple IPs, right?

Yes, and first lookup for QP RC will be "first packet". QP RC are created with "static" address.

>
> I'll ask on Moni's response mail for alternatives.
>
> Thanks,
> Marcel
>
> > > +out:
> > > +	rcu_read_unlock();
> > > +	return same_rxe;
> > > +}
> > > +
> > >   static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > >   		    struct sk_buff *skb, struct rxe_av *av)
> > >   {
> > > @@ -367,7 +388,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > >   		return -EHOSTUNREACH;
> > >   	}
> > >
> > > -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
> > > +	if (addr4_same_rxe(rxe, daddr))
> > >   		pkt->mask |= RXE_LOOPBACK_MASK;
> > >
> > >   	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> > > @@ -384,6 +405,28 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > >   	return 0;
> > >   }
> > >
> > > +static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
> > > +{
> >
> > Ditto
> >
> > > +	struct inet6_dev *in6_dev;
> > > +	struct inet6_ifaddr *ifp;
> > > +	bool same_rxe = false;
> > > +
> > > +	in6_dev = in6_dev_get(rxe->ndev);
> > > +	if (!in6_dev)
> > > +		return false;
> > > +
> > > +	read_lock_bh(&in6_dev->lock);
> > > +	list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
> > > +		if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
> > > +			same_rxe = true;
> > > +			goto out;
> > > +		}
> > > +out:
> > > +	read_unlock_bh(&in6_dev->lock);
> > > +	in6_dev_put(in6_dev);
> > > +	return same_rxe;
> > > +}
> > > +
> > >   static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > >   		    struct sk_buff *skb, struct rxe_av *av)
> > >   {
> > > @@ -398,7 +441,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
> > >   		return -EHOSTUNREACH;
> > >   	}
> > >
> > > -	if (!memcmp(saddr, daddr, sizeof(*daddr)))
> > > +	if (addr6_same_rxe(rxe, daddr))
> > >   		pkt->mask |= RXE_LOOPBACK_MASK;
> > >
> > >   	prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT),
> > > --
> > > 2.9.4
> > >
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-26 14:52 [PATCH] drivers/rxe: improve rxe loopback Marcel Apfelbaum
                   ` (2 preceding siblings ...)
  2017-07-27  7:36 ` Leon Romanovsky
@ 2017-07-27 13:47 ` kbuild test robot
  3 siblings, 0 replies; 12+ messages in thread
From: kbuild test robot @ 2017-07-27 13:47 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: kbuild-all, linux-rdma, linux-kernel, monis, dledford,
	sean.hefty, hal.rosenstock, marcel, yuval.shaia

[-- Attachment #1: Type: text/plain, Size: 2204 bytes --]

Hi Marcel,

[auto build test ERROR on rdma/master]
[also build test ERROR on v4.13-rc2 next-20170727]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Marcel-Apfelbaum/drivers-rxe-improve-rxe-loopback/20170727-211514
base:   https://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma.git master
config: i386-randconfig-x002-07271954 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   drivers/infiniband/sw/rxe/rxe_net.c: In function 'addr6_same_rxe':
>> drivers/infiniband/sw/rxe/rxe_net.c:414:12: error: implicit declaration of function 'in6_dev_get' [-Werror=implicit-function-declaration]
     in6_dev = in6_dev_get(rxe->ndev);
               ^~~~~~~~~~~
>> drivers/infiniband/sw/rxe/rxe_net.c:414:10: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
     in6_dev = in6_dev_get(rxe->ndev);
             ^
>> drivers/infiniband/sw/rxe/rxe_net.c:426:2: error: implicit declaration of function 'in6_dev_put' [-Werror=implicit-function-declaration]
     in6_dev_put(in6_dev);
     ^~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/in6_dev_get +414 drivers/infiniband/sw/rxe/rxe_net.c

   407	
   408	static inline bool addr6_same_rxe(struct rxe_dev *rxe, struct in6_addr *daddr)
   409	{
   410		struct inet6_dev *in6_dev;
   411		struct inet6_ifaddr *ifp;
   412		bool same_rxe = false;
   413	
 > 414		in6_dev = in6_dev_get(rxe->ndev);
   415		if (!in6_dev)
   416			return false;
   417	
   418		read_lock_bh(&in6_dev->lock);
   419		list_for_each_entry(ifp, &in6_dev->addr_list, if_list)
   420			if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) {
   421				same_rxe = true;
   422				goto out;
   423			}
   424	out:
   425		read_unlock_bh(&in6_dev->lock);
 > 426		in6_dev_put(in6_dev);
   427		return same_rxe;
   428	}
   429	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28776 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-27  9:55     ` Marcel Apfelbaum
@ 2017-07-30  9:57       ` Moni Shoua
  2017-07-31  9:53         ` Marcel Apfelbaum
  0 siblings, 1 reply; 12+ messages in thread
From: Moni Shoua @ 2017-07-30  9:57 UTC (permalink / raw)
  To: Marcel Apfelbaum
  Cc: Yuval Shaia, linux-rdma, Linux Kernel Mailinglist, Doug Ledford,
	Sean Hefty, Hal Rosenstock

>> Have you considered using ip_route_output_key() for IPv4 or
>> ip6_route_output() for IPv6 to decide if  this is a loopback?
>> For reference you can check the flow starting at  rdma_resolve_ip()
>>
>
> Hi Moni,
>
> Yes, I had looked into it, but I haven't seen how I can find
> out if the destination IP belongs to the same RXE.
> The loopback flag will give us the "same host"
> confirmation, but not the same rxe instance, right?
>
> Any ideas would be welcomed.
>
> Thanks,
> Marcel
>
Hi Marcel

You are right about that. IFF_LOOPBACK tells you that the source and
destination addresses are on the same host but not necessarily on the
same RXE device.

As Leon mentioned, calling addrX_same_rxe() for each packet seems to
heavy , especially when the use case that justifies it (instead of
calling memcmp() on src and dst) is rare. Do you agree?
If so I think that marking a connection as loopback once is the right approach
For RC/UC - when modified to RTR
For UD - this is harder. IsLoopback() is function of the WQE (or at
least the QP and AH together( but not the QP. I think you can add an
improvement that will work for the majority of cases. This is a sketch
of what I have in mind. Let me know what you think please

1. Add bool last_used_qp to AH structure
2. Add bool is_loopback_with_last_qp to AH structure
3. Set values to AH.last_used_qp and AH.is_loopback_with_last_qp in
post_send() modify_ah(),...
4. Mark WQE as loopback depending on the above

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drivers/rxe: improve rxe loopback
  2017-07-30  9:57       ` Moni Shoua
@ 2017-07-31  9:53         ` Marcel Apfelbaum
  0 siblings, 0 replies; 12+ messages in thread
From: Marcel Apfelbaum @ 2017-07-31  9:53 UTC (permalink / raw)
  To: Moni Shoua
  Cc: Yuval Shaia, linux-rdma, Linux Kernel Mailinglist, Doug Ledford,
	Sean Hefty, Hal Rosenstock

On 30/07/2017 12:57, Moni Shoua wrote:
>>> Have you considered using ip_route_output_key() for IPv4 or
>>> ip6_route_output() for IPv6 to decide if  this is a loopback?
>>> For reference you can check the flow starting at  rdma_resolve_ip()
>>>
>>
>> Hi Moni,
>>
>> Yes, I had looked into it, but I haven't seen how I can find
>> out if the destination IP belongs to the same RXE.
>> The loopback flag will give us the "same host"
>> confirmation, but not the same rxe instance, right?
>>
>> Any ideas would be welcomed.
>>
>> Thanks,
>> Marcel
>>
> Hi Marcel
> 

Hi Moni,

> You are right about that. IFF_LOOPBACK tells you that the source and
> destination addresses are on the same host but not necessarily on the
> same RXE device.
> 
> As Leon mentioned, calling addrX_same_rxe() for each packet seems to
> heavy , especially when the use case that justifies it (instead of
> calling memcmp() on src and dst) is rare. Do you agree?

I do agree is rare, but is depending on use-case. And since it
is a bug we should fix it, but not on the expense of performance
of course.

> If so I think that marking a connection as loopback once is the right approach
> For RC/UC - when modified to RTR

Sounds good to me.

> For UD - this is harder. IsLoopback() is function of the WQE (or at
> least the QP and AH together( but not the QP. I think you can add an
> improvement that will work for the majority of cases. This is a sketch
> of what I have in mind. Let me know what you think please
> 
> 1. Add bool last_used_qp to AH structure
> 2. Add bool is_loopback_with_last_qp to AH structure
> 3. Set values to AH.last_used_qp and AH.is_loopback_with_last_qp in
> post_send() modify_ah(),...
> 4. Mark WQE as loopback depending on the above
> 

Your pointer is very much appreciated, I will look into it.

Thanks,
Marcel

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-07-31  9:53 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-26 14:52 [PATCH] drivers/rxe: improve rxe loopback Marcel Apfelbaum
2017-07-26 19:36 ` Yuval Shaia
2017-07-26 19:56   ` Yuval Shaia
2017-07-26 19:57 ` Yuval Shaia
2017-07-27  7:04   ` Moni Shoua
2017-07-27  9:55     ` Marcel Apfelbaum
2017-07-30  9:57       ` Moni Shoua
2017-07-31  9:53         ` Marcel Apfelbaum
2017-07-27  7:36 ` Leon Romanovsky
2017-07-27  9:49   ` Marcel Apfelbaum
2017-07-27 10:40     ` Leon Romanovsky
2017-07-27 13:47 ` kbuild test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).