Linux-RDMA Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH rdma-next] IB/cma: Honor traffic class from lower netdevice for RoCE
@ 2019-10-02 12:19 Leon Romanovsky
  2019-10-08 19:10 ` Jason Gunthorpe
  0 siblings, 1 reply; 3+ messages in thread
From: Leon Romanovsky @ 2019-10-02 12:19 UTC (permalink / raw)
  To: Doug Ledford, Jason Gunthorpe
  Cc: Parav Pandit, RDMA mailing list, Leon Romanovsky

From: Parav Pandit <parav@mellanox.com>

When macvlan netdevice is used for RoCE, consider the tos->prio->tc
mapping as SL using its lower netdevice.
1. If lower netdevice is VLAN netdevice, consider such VLAN netdevice
and it's parent netdevice for mapping
2. If lower netdevice is not a VLAN netdevice, consider tc mapping
directly from such lower netdevice

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/cma.c | 59 +++++++++++++++++++++++++++++------
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0e3cf3461999..18b5ad8c7d5f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2827,22 +2827,63 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
 	return 0;
 }
 
-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
 {
-	int prio;
 	struct net_device *dev;
 
-	prio = rt_tos2priority(tos);
-	dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
+	dev = vlan_dev_real_dev(vlan_ndev);
 	if (dev->num_tc)
 		return netdev_get_prio_tc_map(dev, prio);
 
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
+	return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
+		VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+
+struct iboe_prio_tc_map {
+	int input_prio;
+	int output_tc;
+	bool found;
+};
+
+static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
+{
+	struct iboe_prio_tc_map *map = data;
+
+	if (is_vlan_dev(dev))
+		map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
+	else if (dev->num_tc)
+		map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
+	else
+		map->output_tc = 0;
+	/* We are interested only in first level VLAN device, so always
+	 * return 1 to stop iterating over next level devices.
+	 */
+	map->found = true;
+	return 1;
+}
+
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+	struct iboe_prio_tc_map prio_tc_map = {};
+	int prio = rt_tos2priority(tos);
+
+	/* If VLAN device, get it directly from the VLAN netdev */
 	if (is_vlan_dev(ndev))
-		return (vlan_dev_get_egress_qos_mask(ndev, prio) &
-			VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-#endif
-	return 0;
+		return get_vlan_ndev_tc(ndev, prio);
+
+	prio_tc_map.input_prio = prio;
+	netdev_walk_all_lower_dev_rcu(ndev,
+				      get_lower_vlan_dev_tc,
+				      &prio_tc_map);
+	/* If map is found from lower device, use it; Otherwise
+	 * continue with the current netdevice to get priority to tc map.
+	 */
+	if (prio_tc_map.found)
+		return prio_tc_map.output_tc;
+	else if (ndev->num_tc)
+		return netdev_get_prio_tc_map(ndev, prio);
+	else
+		return 0;
 }
 
 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH rdma-next] IB/cma: Honor traffic class from lower netdevice for RoCE
  2019-10-02 12:19 [PATCH rdma-next] IB/cma: Honor traffic class from lower netdevice for RoCE Leon Romanovsky
@ 2019-10-08 19:10 ` Jason Gunthorpe
  2019-10-08 19:44   ` Parav Pandit
  0 siblings, 1 reply; 3+ messages in thread
From: Jason Gunthorpe @ 2019-10-08 19:10 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, Parav Pandit, RDMA mailing list, Leon Romanovsky


On Wed, Oct 02, 2019 at 03:19:59PM +0300, Leon Romanovsky wrote:
> From: Parav Pandit <parav@mellanox.com>
> 
> When macvlan netdevice is used for RoCE, consider the tos->prio->tc
> mapping as SL using its lower netdevice.
> 1. If lower netdevice is VLAN netdevice, consider such VLAN netdevice
> and it's parent netdevice for mapping
> 2. If lower netdevice is not a VLAN netdevice, consider tc mapping
> directly from such lower netdevice
> 
> Signed-off-by: Parav Pandit <parav@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
>  drivers/infiniband/core/cma.c | 59 +++++++++++++++++++++++++++++------
>  1 file changed, 50 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index 0e3cf3461999..18b5ad8c7d5f 100644
> +++ b/drivers/infiniband/core/cma.c
> @@ -2827,22 +2827,63 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
>  	return 0;
>  }
>  
> -static int iboe_tos_to_sl(struct net_device *ndev, int tos)
> +static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
>  {
> -	int prio;
>  	struct net_device *dev;
>  
> -	prio = rt_tos2priority(tos);
> -	dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
> +	dev = vlan_dev_real_dev(vlan_ndev);
>  	if (dev->num_tc)
>  		return netdev_get_prio_tc_map(dev, prio);
>  
> -#if IS_ENABLED(CONFIG_VLAN_8021Q)
> +	return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
> +		VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
> +}
> +
> +struct iboe_prio_tc_map {
> +	int input_prio;
> +	int output_tc;
> +	bool found;
> +};
> +
> +static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
> +{
> +	struct iboe_prio_tc_map *map = data;
> +
> +	if (is_vlan_dev(dev))
> +		map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
> +	else if (dev->num_tc)
> +		map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
> +	else
> +		map->output_tc = 0;
> +	/* We are interested only in first level VLAN device, so always
> +	 * return 1 to stop iterating over next level devices.
> +	 */
> +	map->found = true;
> +	return 1;
> +}
> +
> +static int iboe_tos_to_sl(struct net_device *ndev, int tos)
> +{
> +	struct iboe_prio_tc_map prio_tc_map = {};
> +	int prio = rt_tos2priority(tos);
> +
> +	/* If VLAN device, get it directly from the VLAN netdev */
>  	if (is_vlan_dev(ndev))
> -		return (vlan_dev_get_egress_qos_mask(ndev, prio) &
> -			VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
> -#endif
> -	return 0;
> +		return get_vlan_ndev_tc(ndev, prio);
> +
> +	prio_tc_map.input_prio = prio;
> +	netdev_walk_all_lower_dev_rcu(ndev,
> +				      get_lower_vlan_dev_tc,
> +				      &prio_tc_map);

Kinda looks like you have to hold rcu before calling this?

Jason

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH rdma-next] IB/cma: Honor traffic class from lower netdevice for RoCE
  2019-10-08 19:10 ` Jason Gunthorpe
@ 2019-10-08 19:44   ` Parav Pandit
  0 siblings, 0 replies; 3+ messages in thread
From: Parav Pandit @ 2019-10-08 19:44 UTC (permalink / raw)
  To: Jason Gunthorpe, Leon Romanovsky
  Cc: Doug Ledford, RDMA mailing list, Leon Romanovsky



> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Tuesday, October 8, 2019 2:10 PM
> To: Leon Romanovsky <leon@kernel.org>
> Cc: Doug Ledford <dledford@redhat.com>; Parav Pandit
> <parav@mellanox.com>; RDMA mailing list <linux-rdma@vger.kernel.org>;
> Leon Romanovsky <leonro@mellanox.com>
> Subject: Re: [PATCH rdma-next] IB/cma: Honor traffic class from lower
> netdevice for RoCE
> 
> 
> On Wed, Oct 02, 2019 at 03:19:59PM +0300, Leon Romanovsky wrote:
> > From: Parav Pandit <parav@mellanox.com>
> >
> > When macvlan netdevice is used for RoCE, consider the tos->prio->tc
> > mapping as SL using its lower netdevice.
> > 1. If lower netdevice is VLAN netdevice, consider such VLAN netdevice
> > and it's parent netdevice for mapping 2. If lower netdevice is not a
> > VLAN netdevice, consider tc mapping directly from such lower netdevice
> >
> > Signed-off-by: Parav Pandit <parav@mellanox.com>
> > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > drivers/infiniband/core/cma.c | 59 +++++++++++++++++++++++++++++------
> >  1 file changed, 50 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/infiniband/core/cma.c
> > b/drivers/infiniband/core/cma.c index 0e3cf3461999..18b5ad8c7d5f
> > 100644
> > +++ b/drivers/infiniband/core/cma.c
> > @@ -2827,22 +2827,63 @@ static int cma_resolve_iw_route(struct
> rdma_id_private *id_priv)
> >  	return 0;
> >  }
> >
> > -static int iboe_tos_to_sl(struct net_device *ndev, int tos)
> > +static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
> >  {
> > -	int prio;
> >  	struct net_device *dev;
> >
> > -	prio = rt_tos2priority(tos);
> > -	dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
> > +	dev = vlan_dev_real_dev(vlan_ndev);
> >  	if (dev->num_tc)
> >  		return netdev_get_prio_tc_map(dev, prio);
> >
> > -#if IS_ENABLED(CONFIG_VLAN_8021Q)
> > +	return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
> > +		VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; }
> > +
> > +struct iboe_prio_tc_map {
> > +	int input_prio;
> > +	int output_tc;
> > +	bool found;
> > +};
> > +
> > +static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
> > +{
> > +	struct iboe_prio_tc_map *map = data;
> > +
> > +	if (is_vlan_dev(dev))
> > +		map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
> > +	else if (dev->num_tc)
> > +		map->output_tc = netdev_get_prio_tc_map(dev, map-
> >input_prio);
> > +	else
> > +		map->output_tc = 0;
> > +	/* We are interested only in first level VLAN device, so always
> > +	 * return 1 to stop iterating over next level devices.
> > +	 */
> > +	map->found = true;
> > +	return 1;
> > +}
> > +
> > +static int iboe_tos_to_sl(struct net_device *ndev, int tos) {
> > +	struct iboe_prio_tc_map prio_tc_map = {};
> > +	int prio = rt_tos2priority(tos);
> > +
> > +	/* If VLAN device, get it directly from the VLAN netdev */
> >  	if (is_vlan_dev(ndev))
> > -		return (vlan_dev_get_egress_qos_mask(ndev, prio) &
> > -			VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
> > -#endif
> > -	return 0;
> > +		return get_vlan_ndev_tc(ndev, prio);
> > +
> > +	prio_tc_map.input_prio = prio;
> > +	netdev_walk_all_lower_dev_rcu(ndev,
> > +				      get_lower_vlan_dev_tc,
> > +				      &prio_tc_map);
> 
> Kinda looks like you have to hold rcu before calling this?
> 
Oh yes, my bad.
rcu lock unlock calls are missing around netdev_walk_all_lower_dev_rcu().
Will respin through Leon's tree.

> Jason

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, back to index

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-02 12:19 [PATCH rdma-next] IB/cma: Honor traffic class from lower netdevice for RoCE Leon Romanovsky
2019-10-08 19:10 ` Jason Gunthorpe
2019-10-08 19:44   ` Parav Pandit

Linux-RDMA Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rdma/0 linux-rdma/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rdma linux-rdma/ https://lore.kernel.org/linux-rdma \
		linux-rdma@vger.kernel.org linux-rdma@archiver.kernel.org
	public-inbox-index linux-rdma

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rdma


AGPL code for this site: git clone https://public-inbox.org/ public-inbox