All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nikolay Aleksandrov <nikolay@nvidia.com>
To: Jarod Wilson <jarod@redhat.com>, linux-kernel@vger.kernel.org
Cc: Jay Vosburgh <j.vosburgh@gmail.com>,
	Veaceslav Falico <vfalico@gmail.com>,
	Andy Gospodarek <andy@greyhouse.net>,
	"David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Thomas Davis <tadavis@lbl.gov>,
	netdev@vger.kernel.org
Subject: Re: [PATCH 1/4] bonding: add pure source-mac-based tx hashing option
Date: Wed, 19 May 2021 12:01:15 +0300	[thread overview]
Message-ID: <57bacfa0-2d51-1c37-209f-44a3934a55a4@nvidia.com> (raw)
In-Reply-To: <20210518210849.1673577-2-jarod@redhat.com>

On 19/05/2021 00:08, Jarod Wilson wrote:
> As it turns out, a pure source-mac only tx hash has a place for some VM
> setups. The previously added vlan+srcmac hash doesn't work as well for a
> VM with a single MAC and multiple vlans -- these types of setups path
> traffic more efficiently if the load is split by source mac alone.
> 
> Cc: Jay Vosburgh <j.vosburgh@gmail.com>
> Cc: Veaceslav Falico <vfalico@gmail.com>
> Cc: Andy Gospodarek <andy@greyhouse.net>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Thomas Davis <tadavis@lbl.gov>
> Cc: netdev@vger.kernel.org
> Signed-off-by: Jarod Wilson <jarod@redhat.com>
> ---
>  Documentation/networking/bonding.rst | 13 +++++++++++++
>  drivers/net/bonding/bond_main.c      | 26 +++++++++++++++++---------
>  drivers/net/bonding/bond_options.c   |  1 +
>  include/linux/netdevice.h            |  1 +
>  include/uapi/linux/if_bonding.h      |  1 +
>  5 files changed, 33 insertions(+), 9 deletions(-)
> 

Hi,
It would seem you keep adding modes for each field, that seems unnecessary to me and
it also affects the fast path - each new mode you add is another 1+ tests in bond's
fast path. You could instead just add 1 new mode which has configurable hash fields,
take the "hit" for it once in the fast-path (if chosen) and use that.
I'd like to avoid tomorrow getting another "dstmac" mode or something like that.

In fact both of these new modes are unnecessary in most cases, you could use any available method
(e.g. ebpf) to compute and set the skb queue mapping on Tx to choose any slave and that would
override any hash or bond mode. Check __bond_start_xmit() -> bond_slave_override()

Cheers,
 Nik

> diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
> index 62f2aab8eaec..66c3fa3a9040 100644
> --- a/Documentation/networking/bonding.rst
> +++ b/Documentation/networking/bonding.rst
> @@ -964,6 +964,19 @@ xmit_hash_policy
>  
>  		hash = (vlan ID) XOR (source MAC vendor) XOR (source MAC dev)
>  
> +	srcmac
> +
> +		This policy uses a very rudimentary source mac hash to
> +		load-balance traffic per-source-mac, with failover should
> +		one leg fail. The intended use case is for a bond shared
> +		by multiple virtual machines, each with their own virtual
> +		mac address, keeping the VMs traffic all limited to the
> +		same outbound interface.
> +
> +		The formula for the hash is simply
> +
> +		hash = (source MAC vendor) XOR (source MAC dev)
> +
>  	The default value is layer2.  This option was added in bonding
>  	version 2.6.3.  In earlier versions of bonding, this parameter
>  	does not exist, and the layer2 policy is the only policy.  The
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 20bbda1b36e1..d71e398642fb 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -167,7 +167,8 @@ module_param(xmit_hash_policy, charp, 0);
>  MODULE_PARM_DESC(xmit_hash_policy, "balance-alb, balance-tlb, balance-xor, 802.3ad hashing method; "
>  				   "0 for layer 2 (default), 1 for layer 3+4, "
>  				   "2 for layer 2+3, 3 for encap layer 2+3, "
> -				   "4 for encap layer 3+4, 5 for vlan+srcmac");
> +				   "4 for encap layer 3+4, 5 for vlan+srcmac, "
> +				   "6 for srcmac");
>  module_param(arp_interval, int, 0);
>  MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
>  module_param_array(arp_ip_target, charp, NULL, 0);
> @@ -1459,6 +1460,8 @@ static enum netdev_lag_hash bond_lag_hash_type(struct bonding *bond,
>  		return NETDEV_LAG_HASH_E34;
>  	case BOND_XMIT_POLICY_VLAN_SRCMAC:
>  		return NETDEV_LAG_HASH_VLAN_SRCMAC;
> +	case BOND_XMIT_POLICY_SRCMAC:
> +		return NETDEV_LAG_HASH_SRCMAC;
>  	default:
>  		return NETDEV_LAG_HASH_UNKNOWN;
>  	}
> @@ -3521,11 +3524,11 @@ static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
>  	return true;
>  }
>  
> -static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
> +static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, bool with_vlan)
>  {
> -	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
> +	struct ethhdr *mac_hdr = eth_hdr(skb);
>  	u32 srcmac_vendor = 0, srcmac_dev = 0;
> -	u16 vlan;
> +	u32 hash;
>  	int i;
>  
>  	for (i = 0; i < 3; i++)
> @@ -3534,12 +3537,14 @@ static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
>  	for (i = 3; i < ETH_ALEN; i++)
>  		srcmac_dev = (srcmac_dev << 8) | mac_hdr->h_source[i];
>  
> -	if (!skb_vlan_tag_present(skb))
> -		return srcmac_vendor ^ srcmac_dev;
> +	hash = srcmac_vendor ^ srcmac_dev;
> +
> +	if (!with_vlan || !skb_vlan_tag_present(skb))
> +		return hash;
>  
> -	vlan = skb_vlan_tag_get(skb);
> +	hash ^= skb_vlan_tag_get(skb);
>  
> -	return vlan ^ srcmac_vendor ^ srcmac_dev;
> +	return hash;
>  }
>  
>  /* Extract the appropriate headers based on bond's xmit policy */
> @@ -3618,8 +3623,11 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
>  	    skb->l4_hash)
>  		return skb->hash;
>  
> +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_SRCMAC)
> +		return bond_vlan_srcmac_hash(skb, false);
> +
>  	if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
> -		return bond_vlan_srcmac_hash(skb);
> +		return bond_vlan_srcmac_hash(skb, true);
>  
>  	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
>  	    !bond_flow_dissect(bond, skb, &flow))
> diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
> index c9d3604ae129..ff68ad2589f0 100644
> --- a/drivers/net/bonding/bond_options.c
> +++ b/drivers/net/bonding/bond_options.c
> @@ -102,6 +102,7 @@ static const struct bond_opt_value bond_xmit_hashtype_tbl[] = {
>  	{ "encap2+3",    BOND_XMIT_POLICY_ENCAP23,     0},
>  	{ "encap3+4",    BOND_XMIT_POLICY_ENCAP34,     0},
>  	{ "vlan+srcmac", BOND_XMIT_POLICY_VLAN_SRCMAC, 0},
> +	{ "srcmac",      BOND_XMIT_POLICY_SRCMAC,      0},
>  	{ NULL,          -1,                           0},
>  };
>  
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 5cbc950b34df..d88319fca1d3 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2732,6 +2732,7 @@ enum netdev_lag_hash {
>  	NETDEV_LAG_HASH_E23,
>  	NETDEV_LAG_HASH_E34,
>  	NETDEV_LAG_HASH_VLAN_SRCMAC,
> +	NETDEV_LAG_HASH_SRCMAC,
>  	NETDEV_LAG_HASH_UNKNOWN,
>  };
>  
> diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
> index d174914a837d..f3b4d412a73f 100644
> --- a/include/uapi/linux/if_bonding.h
> +++ b/include/uapi/linux/if_bonding.h
> @@ -95,6 +95,7 @@
>  #define BOND_XMIT_POLICY_ENCAP23	3 /* encapsulated layer 2+3 */
>  #define BOND_XMIT_POLICY_ENCAP34	4 /* encapsulated layer 3+4 */
>  #define BOND_XMIT_POLICY_VLAN_SRCMAC	5 /* vlan + source MAC */
> +#define BOND_XMIT_POLICY_SRCMAC		6 /* source MAC only */
>  
>  /* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */
>  #define LACP_STATE_LACP_ACTIVITY   0x1
> 


  reply	other threads:[~2021-05-19  9:01 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-18 21:08 [PATCH 0/4] bond_alb: support VMs behind bridges better Jarod Wilson
2021-05-18 21:08 ` [PATCH 1/4] bonding: add pure source-mac-based tx hashing option Jarod Wilson
2021-05-19  9:01   ` Nikolay Aleksandrov [this message]
2021-05-18 21:08 ` [PATCH 2/4] bond_alb: don't rewrite bridged non-local MACs Jarod Wilson
2021-05-19 22:31   ` Jay Vosburgh
2021-05-18 21:08 ` [PATCH 3/4] bond_alb: don't tx balance multicast traffic either Jarod Wilson
2021-05-19 18:45   ` Jay Vosburgh
2021-05-18 21:08 ` [PATCH 4/4] bond_alb: put all slaves into promisc Jarod Wilson
2021-05-19 16:47   ` Jay Vosburgh
2021-05-21 13:27 ` [PATCH net-next v2 0/4] bonding/balance-alb: support VMs behind bridges better Jarod Wilson
2021-05-21 13:27   ` [PATCH net-next v2 1/4] bonding: add pure source-mac-based tx hashing option Jarod Wilson
2021-05-21 13:39     ` Nikolay Aleksandrov
2021-05-21 13:41       ` Nikolay Aleksandrov
2021-05-21 18:01       ` Jay Vosburgh
2021-05-21 13:27   ` [PATCH net-next v2 2/4] bonding/balance-lb: don't rewrite bridged non-local MACs Jarod Wilson
2021-05-21 17:58     ` Jay Vosburgh
2021-05-21 13:27   ` [PATCH net-next v2 3/4] bonding/balance-alb: don't tx balance multicast traffic either Jarod Wilson
2021-05-21 17:02     ` Jay Vosburgh
2021-05-21 13:27   ` [PATCH net-next v2 4/4] bonding/balance-alb: put all slaves into promisc Jarod Wilson
2021-05-21 17:01     ` Jay Vosburgh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=57bacfa0-2d51-1c37-209f-44a3934a55a4@nvidia.com \
    --to=nikolay@nvidia.com \
    --cc=andy@greyhouse.net \
    --cc=davem@davemloft.net \
    --cc=j.vosburgh@gmail.com \
    --cc=jarod@redhat.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=tadavis@lbl.gov \
    --cc=vfalico@gmail.com \
    --subject='Re: [PATCH 1/4] bonding: add pure source-mac-based tx hashing option' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.