All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-20 10:50 ` Hiroshi Shimamoto
  0 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-20 10:50 UTC (permalink / raw)
  To: Alexander Duyck, e1000-devel
  Cc: netdev, Choi, Sy Jong, Hayato Momma, linux-kernel

From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>

Add netlink directives and ndo entry to control VF multicast promiscuous mode.

Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC addresses
per VF. It means that we cannot assign over 30 IPv6 addresses to a single
VF interface on VM. We want thousands IPv6 addresses in VM.

There is capability of multicast promiscuous mode in Intel 82599 chip.
It enables all multicast packets are delivered to the target VF.

This patch prepares to control that VF multicast promiscuous functionality.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Reviewed-by: Hayato Momma <h-momma@ce.jp.nec.com>
CC: Choi, Sy Jong <sy.jong.choi@intel.com>
---
 include/linux/if_link.h      |  1 +
 include/linux/netdevice.h    |  3 +++
 include/uapi/linux/if_link.h |  6 ++++++
 net/core/rtnetlink.c         | 18 ++++++++++++++++--
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 119130e..bc29ddf 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -14,5 +14,6 @@ struct ifla_vf_info {
 	__u32 linkstate;
 	__u32 min_tx_rate;
 	__u32 max_tx_rate;
+	__u32 mc_promisc;
 };
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 52fd8e8..12e88a7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -868,6 +868,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
  *			  int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
+ * int (*ndo_set_vf_mc_promisc)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
  * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
@@ -1084,6 +1085,8 @@ struct net_device_ops {
 						   int max_tx_rate);
 	int			(*ndo_set_vf_spoofchk)(struct net_device *dev,
 						       int vf, bool setting);
+	int			(*ndo_set_vf_mc_promisc)(struct net_device *dev,
+							 int vf, bool setting);
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index f7d0d2d..32b4b9e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -454,6 +454,7 @@ enum {
 	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
 	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
 	IFLA_VF_RATE,		/* Min and Max TX Bandwidth Allocation */
+	IFLA_VF_MC_PROMISC,	/* Multicast Promiscuous on/off switch */
 	__IFLA_VF_MAX,
 };
 
@@ -498,6 +499,11 @@ struct ifla_vf_link_state {
 	__u32 link_state;
 };
 
+struct ifla_vf_mc_promisc {
+	__u32 vf;
+	__u32 setting;
+};
+
 /* VF ports management section
  *
  *	Nested layout of set/get msg is:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9cf6fe9..5992245 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -807,7 +807,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
 			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
 			 nla_total_size(sizeof(struct ifla_vf_rate)) +
-			 nla_total_size(sizeof(struct ifla_vf_link_state)));
+			 nla_total_size(sizeof(struct ifla_vf_link_state)) +
+			 nla_total_size(sizeof(struct ifla_vf_mc_promisc)));
 		return size;
 	} else
 		return 0;
@@ -1099,6 +1100,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_tx_rate vf_tx_rate;
 			struct ifla_vf_spoofchk vf_spoofchk;
 			struct ifla_vf_link_state vf_linkstate;
+			struct ifla_vf_mc_promisc vf_mc_promisc;
 
 			/*
 			 * Not all SR-IOV capable drivers support the
@@ -1107,6 +1109,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			 * report anything.
 			 */
 			ivi.spoofchk = -1;
+			ivi.mc_promisc = -1;
 			memset(ivi.mac, 0, sizeof(ivi.mac));
 			/* The default value for VF link state is "auto"
 			 * IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1119,7 +1122,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				vf_rate.vf =
 				vf_tx_rate.vf =
 				vf_spoofchk.vf =
-				vf_linkstate.vf = ivi.vf;
+				vf_linkstate.vf =
+				vf_mc_promisc.vf = ivi.vf;
 
 			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 			vf_vlan.vlan = ivi.vlan;
@@ -1128,6 +1132,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			vf_rate.min_tx_rate = ivi.min_tx_rate;
 			vf_rate.max_tx_rate = ivi.max_tx_rate;
 			vf_spoofchk.setting = ivi.spoofchk;
+			vf_mc_promisc.setting = ivi.mc_promisc;
 			vf_linkstate.link_state = ivi.linkstate;
 			vf = nla_nest_start(skb, IFLA_VF_INFO);
 			if (!vf) {
@@ -1461,6 +1466,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
 								 ivl->link_state);
 			break;
 		}
+		case IFLA_VF_MC_PROMISC: {
+			struct ifla_vf_mc_promisc *ivm;
+			ivm = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_mc_promisc)
+				err = ops->ndo_set_vf_mc_promisc(dev, ivm->vf,
+								 ivm->setting);
+			break;
+		}
 		default:
 			err = -EINVAL;
 			break;
-- 
1.9.0


^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-20 10:50 ` Hiroshi Shimamoto
  0 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-20 10:50 UTC (permalink / raw)
  To: Alexander Duyck, e1000-devel
  Cc: netdev, Choi, Sy Jong, Hayato Momma, linux-kernel

From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>

Add netlink directives and ndo entry to control VF multicast promiscuous mode.

Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC addresses
per VF. It means that we cannot assign over 30 IPv6 addresses to a single
VF interface on VM. We want thousands IPv6 addresses in VM.

There is capability of multicast promiscuous mode in Intel 82599 chip.
It enables all multicast packets are delivered to the target VF.

This patch prepares to control that VF multicast promiscuous functionality.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Reviewed-by: Hayato Momma <h-momma@ce.jp.nec.com>
CC: Choi, Sy Jong <sy.jong.choi@intel.com>
---
 include/linux/if_link.h      |  1 +
 include/linux/netdevice.h    |  3 +++
 include/uapi/linux/if_link.h |  6 ++++++
 net/core/rtnetlink.c         | 18 ++++++++++++++++--
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 119130e..bc29ddf 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -14,5 +14,6 @@ struct ifla_vf_info {
 	__u32 linkstate;
 	__u32 min_tx_rate;
 	__u32 max_tx_rate;
+	__u32 mc_promisc;
 };
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 52fd8e8..12e88a7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -868,6 +868,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
  *			  int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
+ * int (*ndo_set_vf_mc_promisc)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
  * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
@@ -1084,6 +1085,8 @@ struct net_device_ops {
 						   int max_tx_rate);
 	int			(*ndo_set_vf_spoofchk)(struct net_device *dev,
 						       int vf, bool setting);
+	int			(*ndo_set_vf_mc_promisc)(struct net_device *dev,
+							 int vf, bool setting);
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index f7d0d2d..32b4b9e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -454,6 +454,7 @@ enum {
 	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
 	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
 	IFLA_VF_RATE,		/* Min and Max TX Bandwidth Allocation */
+	IFLA_VF_MC_PROMISC,	/* Multicast Promiscuous on/off switch */
 	__IFLA_VF_MAX,
 };
 
@@ -498,6 +499,11 @@ struct ifla_vf_link_state {
 	__u32 link_state;
 };
 
+struct ifla_vf_mc_promisc {
+	__u32 vf;
+	__u32 setting;
+};
+
 /* VF ports management section
  *
  *	Nested layout of set/get msg is:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9cf6fe9..5992245 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -807,7 +807,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
 			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
 			 nla_total_size(sizeof(struct ifla_vf_rate)) +
-			 nla_total_size(sizeof(struct ifla_vf_link_state)));
+			 nla_total_size(sizeof(struct ifla_vf_link_state)) +
+			 nla_total_size(sizeof(struct ifla_vf_mc_promisc)));
 		return size;
 	} else
 		return 0;
@@ -1099,6 +1100,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_tx_rate vf_tx_rate;
 			struct ifla_vf_spoofchk vf_spoofchk;
 			struct ifla_vf_link_state vf_linkstate;
+			struct ifla_vf_mc_promisc vf_mc_promisc;
 
 			/*
 			 * Not all SR-IOV capable drivers support the
@@ -1107,6 +1109,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			 * report anything.
 			 */
 			ivi.spoofchk = -1;
+			ivi.mc_promisc = -1;
 			memset(ivi.mac, 0, sizeof(ivi.mac));
 			/* The default value for VF link state is "auto"
 			 * IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1119,7 +1122,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				vf_rate.vf =
 				vf_tx_rate.vf =
 				vf_spoofchk.vf =
-				vf_linkstate.vf = ivi.vf;
+				vf_linkstate.vf =
+				vf_mc_promisc.vf = ivi.vf;
 
 			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 			vf_vlan.vlan = ivi.vlan;
@@ -1128,6 +1132,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			vf_rate.min_tx_rate = ivi.min_tx_rate;
 			vf_rate.max_tx_rate = ivi.max_tx_rate;
 			vf_spoofchk.setting = ivi.spoofchk;
+			vf_mc_promisc.setting = ivi.mc_promisc;
 			vf_linkstate.link_state = ivi.linkstate;
 			vf = nla_nest_start(skb, IFLA_VF_INFO);
 			if (!vf) {
@@ -1461,6 +1466,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
 								 ivl->link_state);
 			break;
 		}
+		case IFLA_VF_MC_PROMISC: {
+			struct ifla_vf_mc_promisc *ivm;
+			ivm = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_mc_promisc)
+				err = ops->ndo_set_vf_mc_promisc(dev, ivm->vf,
+								 ivm->setting);
+			break;
+		}
 		default:
 			err = -EINVAL;
 			break;
-- 
1.9.0


------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-20 10:50 ` Hiroshi Shimamoto
@ 2015-01-20 11:42   ` Bjørn Mork
  -1 siblings, 0 replies; 46+ messages in thread
From: Bjørn Mork @ 2015-01-20 11:42 UTC (permalink / raw)
  To: Hiroshi Shimamoto
  Cc: Alexander Duyck, e1000-devel, netdev, Choi, Sy Jong,
	Hayato Momma, linux-kernel

Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:

> From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
>
> Add netlink directives and ndo entry to control VF multicast promiscuous mode.
>
> Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC addresses
> per VF. It means that we cannot assign over 30 IPv6 addresses to a single
> VF interface on VM. We want thousands IPv6 addresses in VM.
>
> There is capability of multicast promiscuous mode in Intel 82599 chip.
> It enables all multicast packets are delivered to the target VF.
>
> This patch prepares to control that VF multicast promiscuous functionality.

Adding a new hook for this seems over-complicated to me.  And it still
doesn't solve the real problems that
 a) the user has to know about this limit, and
 b) manually configure the feature

Most of us, lacking the ability to imagine such arbitrary hardware
limitations, will go through a few hours of frustrating debugging before
we figure this one out...

Why can't the ixgbevf driver just automatically signal the ixgbe driver
to enable multicast promiscuous mode whenever the list grows past the
limit?

I'd also like to note that this comment in
drivers/net/ethernet/intel/ixgbevf/vf.c
indicates that the author had some ideas about how more than 30
addresses could/should be handled:

static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
					  struct net_device *netdev)
{
	struct netdev_hw_addr *ha;
	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
	u16 *vector_list = (u16 *)&msgbuf[1];
	u32 cnt, i;

	/* Each entry in the list uses 1 16 bit word.  We have 30
	 * 16 bit words available in our HW msg buffer (minus 1 for the
	 * msg type).  That's 30 hash values if we pack 'em right.  If
	 * there are more than 30 MC addresses to add then punt the
	 * extras for now and then add code to handle more than 30 later.
	 * It would be unusual for a server to request that many multi-cast
	 * addresses except for in large enterprise network environments.
	 */



The last 2 lines of that comment are of course totally bogus and
pointless and should be deleted in any case...  It's obvious that 30
multicast addresses is ridiculously low for lots of normal use cases.


Bjørn

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-20 11:42   ` Bjørn Mork
  0 siblings, 0 replies; 46+ messages in thread
From: Bjørn Mork @ 2015-01-20 11:42 UTC (permalink / raw)
  To: Hiroshi Shimamoto
  Cc: Alexander Duyck, e1000-devel, netdev, Choi, Sy Jong,
	Hayato Momma, linux-kernel

Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:

> From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
>
> Add netlink directives and ndo entry to control VF multicast promiscuous mode.
>
> Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC addresses
> per VF. It means that we cannot assign over 30 IPv6 addresses to a single
> VF interface on VM. We want thousands IPv6 addresses in VM.
>
> There is capability of multicast promiscuous mode in Intel 82599 chip.
> It enables all multicast packets are delivered to the target VF.
>
> This patch prepares to control that VF multicast promiscuous functionality.

Adding a new hook for this seems over-complicated to me.  And it still
doesn't solve the real problems that
 a) the user has to know about this limit, and
 b) manually configure the feature

Most of us, lacking the ability to imagine such arbitrary hardware
limitations, will go through a few hours of frustrating debugging before
we figure this one out...

Why can't the ixgbevf driver just automatically signal the ixgbe driver
to enable multicast promiscuous mode whenever the list grows past the
limit?

I'd also like to note that this comment in
drivers/net/ethernet/intel/ixgbevf/vf.c
indicates that the author had some ideas about how more than 30
addresses could/should be handled:

static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
					  struct net_device *netdev)
{
	struct netdev_hw_addr *ha;
	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
	u16 *vector_list = (u16 *)&msgbuf[1];
	u32 cnt, i;

	/* Each entry in the list uses 1 16 bit word.  We have 30
	 * 16 bit words available in our HW msg buffer (minus 1 for the
	 * msg type).  That's 30 hash values if we pack 'em right.  If
	 * there are more than 30 MC addresses to add then punt the
	 * extras for now and then add code to handle more than 30 later.
	 * It would be unusual for a server to request that many multi-cast
	 * addresses except for in large enterprise network environments.
	 */



The last 2 lines of that comment are of course totally bogus and
pointless and should be deleted in any case...  It's obvious that 30
multicast addresses is ridiculously low for lots of normal use cases.


Bjørn

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-20 10:50 ` Hiroshi Shimamoto
@ 2015-01-20 11:53   ` David Laight
  -1 siblings, 0 replies; 46+ messages in thread
From: David Laight @ 2015-01-20 11:53 UTC (permalink / raw)
  To: 'Hiroshi Shimamoto', Alexander Duyck, e1000-devel
  Cc: netdev, Choi, Sy Jong, Hayato Momma, linux-kernel

From: Hiroshi Shimamoto
> From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> 
> Add netlink directives and ndo entry to control VF multicast promiscuous mode.
> 
> Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC addresses
> per VF. It means that we cannot assign over 30 IPv6 addresses to a single
> VF interface on VM. We want thousands IPv6 addresses in VM.
> 
> There is capability of multicast promiscuous mode in Intel 82599 chip.
> It enables all multicast packets are delivered to the target VF.

A lot of ethernet chips (certainly older ones) used a hash for the
multicast filter. So the hardware filter was never perfect.

I even know of one that needed to be told the addresses and used
those to set the hash bits.
For that device I generated a canonical set of MAC addresses (one
for each multicast hash) and did a software count of how many times
each one was needed for each of the enabled multicasts.

My guess is that 'multicast promiscuous mode' is always valid
and should be enabled by the relevant low level driver when
any table setup fails because there are too many entries.

I actually suspect that most systems want all the multicasts
that occur at any moderate rate on the local network segment.

	David


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-20 11:53   ` David Laight
  0 siblings, 0 replies; 46+ messages in thread
From: David Laight @ 2015-01-20 11:53 UTC (permalink / raw)
  To: 'Hiroshi Shimamoto', Alexander Duyck, e1000-devel
  Cc: netdev, Choi, Sy Jong, Hayato Momma, linux-kernel

From: Hiroshi Shimamoto
> From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> 
> Add netlink directives and ndo entry to control VF multicast promiscuous mode.
> 
> Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC addresses
> per VF. It means that we cannot assign over 30 IPv6 addresses to a single
> VF interface on VM. We want thousands IPv6 addresses in VM.
> 
> There is capability of multicast promiscuous mode in Intel 82599 chip.
> It enables all multicast packets are delivered to the target VF.

A lot of ethernet chips (certainly older ones) used a hash for the
multicast filter. So the hardware filter was never perfect.

I even know of one that needed to be told the addresses and used
those to set the hash bits.
For that device I generated a canonical set of MAC addresses (one
for each multicast hash) and did a software count of how many times
each one was needed for each of the enabled multicasts.

My guess is that 'multicast promiscuous mode' is always valid
and should be enabled by the relevant low level driver when
any table setup fails because there are too many entries.

I actually suspect that most systems want all the multicasts
that occur at any moderate rate on the local network segment.

	David

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-20 11:42   ` Bjørn Mork
@ 2015-01-20 23:40     ` Hiroshi Shimamoto
  -1 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-20 23:40 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: Alexander Duyck, e1000-devel, netdev, Choi, Sy Jong,
	Hayato Momma, linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 3077 bytes --]

> Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> 
> Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> 
> > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> >
> > Add netlink directives and ndo entry to control VF multicast promiscuous mode.
> >
> > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC addresses
> > per VF. It means that we cannot assign over 30 IPv6 addresses to a single
> > VF interface on VM. We want thousands IPv6 addresses in VM.
> >
> > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > It enables all multicast packets are delivered to the target VF.
> >
> > This patch prepares to control that VF multicast promiscuous functionality.
> 
> Adding a new hook for this seems over-complicated to me.  And it still
> doesn't solve the real problems that
>  a) the user has to know about this limit, and
>  b) manually configure the feature
> 
> Most of us, lacking the ability to imagine such arbitrary hardware
> limitations, will go through a few hours of frustrating debugging before
> we figure this one out...
> 
> Why can't the ixgbevf driver just automatically signal the ixgbe driver
> to enable multicast promiscuous mode whenever the list grows past the
> limit?

I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
https://lkml.org/lkml/2014/11/27/269

The previous patch introduces API between ixgbe and ixgbevf driver to
enable multicast promiscuous mode, and ixgbevf enables it automatically
if the number of addresses is over than 30.

I got some comment and I would like to clarify the point, but there was no
answer.
That's the reason I submitted this patch.

Do you think a patch for the ixgbe/ixgbevf driver is preferred?


thanks,
Hiroshi

> 
> I'd also like to note that this comment in
> drivers/net/ethernet/intel/ixgbevf/vf.c
> indicates that the author had some ideas about how more than 30
> addresses could/should be handled:
> 
> static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> 					  struct net_device *netdev)
> {
> 	struct netdev_hw_addr *ha;
> 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> 	u16 *vector_list = (u16 *)&msgbuf[1];
> 	u32 cnt, i;
> 
> 	/* Each entry in the list uses 1 16 bit word.  We have 30
> 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> 	 * there are more than 30 MC addresses to add then punt the
> 	 * extras for now and then add code to handle more than 30 later.
> 	 * It would be unusual for a server to request that many multi-cast
> 	 * addresses except for in large enterprise network environments.
> 	 */
> 
> 
> 
> The last 2 lines of that comment are of course totally bogus and
> pointless and should be deleted in any case...  It's obvious that 30
> multicast addresses is ridiculously low for lots of normal use cases.
> 
> 
> Bjørn
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-20 23:40     ` Hiroshi Shimamoto
  0 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-20 23:40 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

> Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> 
> Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> 
> > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> >
> > Add netlink directives and ndo entry to control VF multicast promiscuous mode.
> >
> > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC addresses
> > per VF. It means that we cannot assign over 30 IPv6 addresses to a single
> > VF interface on VM. We want thousands IPv6 addresses in VM.
> >
> > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > It enables all multicast packets are delivered to the target VF.
> >
> > This patch prepares to control that VF multicast promiscuous functionality.
> 
> Adding a new hook for this seems over-complicated to me.  And it still
> doesn't solve the real problems that
>  a) the user has to know about this limit, and
>  b) manually configure the feature
> 
> Most of us, lacking the ability to imagine such arbitrary hardware
> limitations, will go through a few hours of frustrating debugging before
> we figure this one out...
> 
> Why can't the ixgbevf driver just automatically signal the ixgbe driver
> to enable multicast promiscuous mode whenever the list grows past the
> limit?

I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
https://lkml.org/lkml/2014/11/27/269

The previous patch introduces API between ixgbe and ixgbevf driver to
enable multicast promiscuous mode, and ixgbevf enables it automatically
if the number of addresses is over than 30.

I got some comment and I would like to clarify the point, but there was no
answer.
That's the reason I submitted this patch.

Do you think a patch for the ixgbe/ixgbevf driver is preferred?


thanks,
Hiroshi

> 
> I'd also like to note that this comment in
> drivers/net/ethernet/intel/ixgbevf/vf.c
> indicates that the author had some ideas about how more than 30
> addresses could/should be handled:
> 
> static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> 					  struct net_device *netdev)
> {
> 	struct netdev_hw_addr *ha;
> 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> 	u16 *vector_list = (u16 *)&msgbuf[1];
> 	u32 cnt, i;
> 
> 	/* Each entry in the list uses 1 16 bit word.  We have 30
> 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> 	 * there are more than 30 MC addresses to add then punt the
> 	 * extras for now and then add code to handle more than 30 later.
> 	 * It would be unusual for a server to request that many multi-cast
> 	 * addresses except for in large enterprise network environments.
> 	 */
> 
> 
> 
> The last 2 lines of that comment are of course totally bogus and
> pointless and should be deleted in any case...  It's obvious that 30
> multicast addresses is ridiculously low for lots of normal use cases.
> 
> 
> Bjørn
------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-20 23:40     ` Hiroshi Shimamoto
@ 2015-01-21  0:26       ` Skidmore, Donald C
  -1 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-21  0:26 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 4171 bytes --]



> -----Original Message-----
> From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> Sent: Tuesday, January 20, 2015 3:40 PM
> To: Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; Hayato Momma
> Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> > Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode
> > control
> >
> > Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> >
> > > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> > >
> > > Add netlink directives and ndo entry to control VF multicast promiscuous
> mode.
> > >
> > > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC
> > > addresses per VF. It means that we cannot assign over 30 IPv6
> > > addresses to a single VF interface on VM. We want thousands IPv6
> addresses in VM.
> > >
> > > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > > It enables all multicast packets are delivered to the target VF.
> > >
> > > This patch prepares to control that VF multicast promiscuous
> functionality.
> >
> > Adding a new hook for this seems over-complicated to me.  And it still
> > doesn't solve the real problems that
> >  a) the user has to know about this limit, and
> >  b) manually configure the feature
> >
> > Most of us, lacking the ability to imagine such arbitrary hardware
> > limitations, will go through a few hours of frustrating debugging
> > before we figure this one out...
> >
> > Why can't the ixgbevf driver just automatically signal the ixgbe
> > driver to enable multicast promiscuous mode whenever the list grows
> > past the limit?
> 
> I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> https://lkml.org/lkml/2014/11/27/269
> 
> The previous patch introduces API between ixgbe and ixgbevf driver to
> enable multicast promiscuous mode, and ixgbevf enables it automatically if
> the number of addresses is over than 30.

I believe the issue is with allowing a VF to automatically enter Promiscuous Multicast without the PF's ok is concern over VM isolation.   Of course that isolation, when it comes to multicast, is rather limited anyway given that our multicast filter uses only 12-bit of the address for a match.  Still this (or doing it by default) would only open that up considerably more (all multicasts).  I assume for your application you're not concerned, but are there other use cases that would worry about such things?

Thanks,
-Don Skidmore <donald.c.skidmore@intel.com>

> 
> I got some comment and I would like to clarify the point, but there was no
> answer.
> That's the reason I submitted this patch.
> 
> Do you think a patch for the ixgbe/ixgbevf driver is preferred?
> 
> 
> thanks,
> Hiroshi
> 
> >
> > I'd also like to note that this comment in
> > drivers/net/ethernet/intel/ixgbevf/vf.c
> > indicates that the author had some ideas about how more than 30
> > addresses could/should be handled:
> >
> > static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> > 					  struct net_device *netdev)
> > {
> > 	struct netdev_hw_addr *ha;
> > 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> > 	u16 *vector_list = (u16 *)&msgbuf[1];
> > 	u32 cnt, i;
> >
> > 	/* Each entry in the list uses 1 16 bit word.  We have 30
> > 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> > 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> > 	 * there are more than 30 MC addresses to add then punt the
> > 	 * extras for now and then add code to handle more than 30 later.
> > 	 * It would be unusual for a server to request that many multi-cast
> > 	 * addresses except for in large enterprise network environments.
> > 	 */
> >
> >
> >
> > The last 2 lines of that comment are of course totally bogus and
> > pointless and should be deleted in any case...  It's obvious that 30
> > multicast addresses is ridiculously low for lots of normal use cases.
> >
> >
> > Bjørn

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-21  0:26       ` Skidmore, Donald C
  0 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-21  0:26 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma



> -----Original Message-----
> From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> Sent: Tuesday, January 20, 2015 3:40 PM
> To: Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; Hayato Momma
> Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> > Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode
> > control
> >
> > Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> >
> > > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> > >
> > > Add netlink directives and ndo entry to control VF multicast promiscuous
> mode.
> > >
> > > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC
> > > addresses per VF. It means that we cannot assign over 30 IPv6
> > > addresses to a single VF interface on VM. We want thousands IPv6
> addresses in VM.
> > >
> > > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > > It enables all multicast packets are delivered to the target VF.
> > >
> > > This patch prepares to control that VF multicast promiscuous
> functionality.
> >
> > Adding a new hook for this seems over-complicated to me.  And it still
> > doesn't solve the real problems that
> >  a) the user has to know about this limit, and
> >  b) manually configure the feature
> >
> > Most of us, lacking the ability to imagine such arbitrary hardware
> > limitations, will go through a few hours of frustrating debugging
> > before we figure this one out...
> >
> > Why can't the ixgbevf driver just automatically signal the ixgbe
> > driver to enable multicast promiscuous mode whenever the list grows
> > past the limit?
> 
> I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> https://lkml.org/lkml/2014/11/27/269
> 
> The previous patch introduces API between ixgbe and ixgbevf driver to
> enable multicast promiscuous mode, and ixgbevf enables it automatically if
> the number of addresses is over than 30.

I believe the issue is with allowing a VF to automatically enter Promiscuous Multicast without the PF's ok is concern over VM isolation.   Of course that isolation, when it comes to multicast, is rather limited anyway given that our multicast filter uses only 12-bit of the address for a match.  Still this (or doing it by default) would only open that up considerably more (all multicasts).  I assume for your application you're not concerned, but are there other use cases that would worry about such things?

Thanks,
-Don Skidmore <donald.c.skidmore@intel.com>

> 
> I got some comment and I would like to clarify the point, but there was no
> answer.
> That's the reason I submitted this patch.
> 
> Do you think a patch for the ixgbe/ixgbevf driver is preferred?
> 
> 
> thanks,
> Hiroshi
> 
> >
> > I'd also like to note that this comment in
> > drivers/net/ethernet/intel/ixgbevf/vf.c
> > indicates that the author had some ideas about how more than 30
> > addresses could/should be handled:
> >
> > static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> > 					  struct net_device *netdev)
> > {
> > 	struct netdev_hw_addr *ha;
> > 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> > 	u16 *vector_list = (u16 *)&msgbuf[1];
> > 	u32 cnt, i;
> >
> > 	/* Each entry in the list uses 1 16 bit word.  We have 30
> > 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> > 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> > 	 * there are more than 30 MC addresses to add then punt the
> > 	 * extras for now and then add code to handle more than 30 later.
> > 	 * It would be unusual for a server to request that many multi-cast
> > 	 * addresses except for in large enterprise network environments.
> > 	 */
> >
> >
> >
> > The last 2 lines of that comment are of course totally bogus and
> > pointless and should be deleted in any case...  It's obvious that 30
> > multicast addresses is ridiculously low for lots of normal use cases.
> >
> >
> > Bjørn


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-21  0:26       ` Skidmore, Donald C
@ 2015-01-21  1:07         ` Hiroshi Shimamoto
  -1 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-21  1:07 UTC (permalink / raw)
  To: Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 5167 bytes --]

> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> 
> 
> 
> > -----Original Message-----
> > From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> > Sent: Tuesday, January 20, 2015 3:40 PM
> > To: Bjørn Mork
> > Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> > Jong; linux-kernel@vger.kernel.org; Hayato Momma
> > Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> > mode control
> >
> > > Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode
> > > control
> > >
> > > Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> > >
> > > > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> > > >
> > > > Add netlink directives and ndo entry to control VF multicast promiscuous
> > mode.
> > > >
> > > > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC
> > > > addresses per VF. It means that we cannot assign over 30 IPv6
> > > > addresses to a single VF interface on VM. We want thousands IPv6
> > addresses in VM.
> > > >
> > > > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > > > It enables all multicast packets are delivered to the target VF.
> > > >
> > > > This patch prepares to control that VF multicast promiscuous
> > functionality.
> > >
> > > Adding a new hook for this seems over-complicated to me.  And it still
> > > doesn't solve the real problems that
> > >  a) the user has to know about this limit, and
> > >  b) manually configure the feature
> > >
> > > Most of us, lacking the ability to imagine such arbitrary hardware
> > > limitations, will go through a few hours of frustrating debugging
> > > before we figure this one out...
> > >
> > > Why can't the ixgbevf driver just automatically signal the ixgbe
> > > driver to enable multicast promiscuous mode whenever the list grows
> > > past the limit?
> >
> > I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> > https://lkml.org/lkml/2014/11/27/269
> >
> > The previous patch introduces API between ixgbe and ixgbevf driver to
> > enable multicast promiscuous mode, and ixgbevf enables it automatically if
> > the number of addresses is over than 30.
> 
> I believe the issue is with allowing a VF to automatically enter Promiscuous Multicast without the PF's ok is concern

So you mean that we should take care about enabling VF multicast promiscuous mode
in host side, right? The host allows multicast promiscuous and VF requests it too,
then enables VF multicast promiscuous mode.
So, what is preferred way to do in host do you think?

> over VM isolation.   Of course that isolation, when it comes to multicast, is rather limited anyway given that our multicast
> filter uses only 12-bit of the address for a match.  Still this (or doing it by default) would only open that up considerably
> more (all multicasts).  I assume for your application you're not concerned, but are there other use cases that would worry
> about such things?

Sorry I couldn't catch the point.

What is the issue? I think there is no difference for the users who don't want
many multicast addresses in guest. In the current implementation, overflowed
multicast addresses silently discarded in ixgbevf. I believe there is no user
who want to use over 30 multicast addresses now. If VF multicast promiscuous mode
is enabled in certain VF, the behavior of other VFs is not changed.

thanks,
Hiroshi

> 
> Thanks,
> -Don Skidmore <donald.c.skidmore@intel.com>
> 
> >
> > I got some comment and I would like to clarify the point, but there was no
> > answer.
> > That's the reason I submitted this patch.
> >
> > Do you think a patch for the ixgbe/ixgbevf driver is preferred?
> >
> >
> > thanks,
> > Hiroshi
> >
> > >
> > > I'd also like to note that this comment in
> > > drivers/net/ethernet/intel/ixgbevf/vf.c
> > > indicates that the author had some ideas about how more than 30
> > > addresses could/should be handled:
> > >
> > > static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> > > 					  struct net_device *netdev)
> > > {
> > > 	struct netdev_hw_addr *ha;
> > > 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> > > 	u16 *vector_list = (u16 *)&msgbuf[1];
> > > 	u32 cnt, i;
> > >
> > > 	/* Each entry in the list uses 1 16 bit word.  We have 30
> > > 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> > > 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> > > 	 * there are more than 30 MC addresses to add then punt the
> > > 	 * extras for now and then add code to handle more than 30 later.
> > > 	 * It would be unusual for a server to request that many multi-cast
> > > 	 * addresses except for in large enterprise network environments.
> > > 	 */
> > >
> > >
> > >
> > > The last 2 lines of that comment are of course totally bogus and
> > > pointless and should be deleted in any case...  It's obvious that 30
> > > multicast addresses is ridiculously low for lots of normal use cases.
> > >
> > >
> > > Bjørn

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-21  1:07         ` Hiroshi Shimamoto
  0 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-21  1:07 UTC (permalink / raw)
  To: Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, Hayato Momma, linux-kernel

> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> 
> 
> 
> > -----Original Message-----
> > From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> > Sent: Tuesday, January 20, 2015 3:40 PM
> > To: Bjørn Mork
> > Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> > Jong; linux-kernel@vger.kernel.org; Hayato Momma
> > Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> > mode control
> >
> > > Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode
> > > control
> > >
> > > Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> > >
> > > > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> > > >
> > > > Add netlink directives and ndo entry to control VF multicast promiscuous
> > mode.
> > > >
> > > > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC
> > > > addresses per VF. It means that we cannot assign over 30 IPv6
> > > > addresses to a single VF interface on VM. We want thousands IPv6
> > addresses in VM.
> > > >
> > > > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > > > It enables all multicast packets are delivered to the target VF.
> > > >
> > > > This patch prepares to control that VF multicast promiscuous
> > functionality.
> > >
> > > Adding a new hook for this seems over-complicated to me.  And it still
> > > doesn't solve the real problems that
> > >  a) the user has to know about this limit, and
> > >  b) manually configure the feature
> > >
> > > Most of us, lacking the ability to imagine such arbitrary hardware
> > > limitations, will go through a few hours of frustrating debugging
> > > before we figure this one out...
> > >
> > > Why can't the ixgbevf driver just automatically signal the ixgbe
> > > driver to enable multicast promiscuous mode whenever the list grows
> > > past the limit?
> >
> > I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> > https://lkml.org/lkml/2014/11/27/269
> >
> > The previous patch introduces API between ixgbe and ixgbevf driver to
> > enable multicast promiscuous mode, and ixgbevf enables it automatically if
> > the number of addresses is over than 30.
> 
> I believe the issue is with allowing a VF to automatically enter Promiscuous Multicast without the PF's ok is concern

So you mean that we should take care about enabling VF multicast promiscuous mode
in host side, right? The host allows multicast promiscuous and VF requests it too,
then enables VF multicast promiscuous mode.
So, what is preferred way to do in host do you think?

> over VM isolation.   Of course that isolation, when it comes to multicast, is rather limited anyway given that our multicast
> filter uses only 12-bit of the address for a match.  Still this (or doing it by default) would only open that up considerably
> more (all multicasts).  I assume for your application you're not concerned, but are there other use cases that would worry
> about such things?

Sorry I couldn't catch the point.

What is the issue? I think there is no difference for the users who don't want
many multicast addresses in guest. In the current implementation, overflowed
multicast addresses silently discarded in ixgbevf. I believe there is no user
who want to use over 30 multicast addresses now. If VF multicast promiscuous mode
is enabled in certain VF, the behavior of other VFs is not changed.

thanks,
Hiroshi

> 
> Thanks,
> -Don Skidmore <donald.c.skidmore@intel.com>
> 
> >
> > I got some comment and I would like to clarify the point, but there was no
> > answer.
> > That's the reason I submitted this patch.
> >
> > Do you think a patch for the ixgbe/ixgbevf driver is preferred?
> >
> >
> > thanks,
> > Hiroshi
> >
> > >
> > > I'd also like to note that this comment in
> > > drivers/net/ethernet/intel/ixgbevf/vf.c
> > > indicates that the author had some ideas about how more than 30
> > > addresses could/should be handled:
> > >
> > > static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> > > 					  struct net_device *netdev)
> > > {
> > > 	struct netdev_hw_addr *ha;
> > > 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> > > 	u16 *vector_list = (u16 *)&msgbuf[1];
> > > 	u32 cnt, i;
> > >
> > > 	/* Each entry in the list uses 1 16 bit word.  We have 30
> > > 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> > > 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> > > 	 * there are more than 30 MC addresses to add then punt the
> > > 	 * extras for now and then add code to handle more than 30 later.
> > > 	 * It would be unusual for a server to request that many multi-cast
> > > 	 * addresses except for in large enterprise network environments.
> > > 	 */
> > >
> > >
> > >
> > > The last 2 lines of that comment are of course totally bogus and
> > > pointless and should be deleted in any case...  It's obvious that 30
> > > multicast addresses is ridiculously low for lots of normal use cases.
> > >
> > >
> > > Bjørn

------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-21  1:07         ` Hiroshi Shimamoto
@ 2015-01-21  1:30           ` Skidmore, Donald C
  -1 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-21  1:30 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 6513 bytes --]



> -----Original Message-----
> From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> Sent: Tuesday, January 20, 2015 5:07 PM
> To: Skidmore, Donald C; Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; Hayato Momma
> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > promiscuous mode control
> >
> >
> >
> > > -----Original Message-----
> > > From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> > > Sent: Tuesday, January 20, 2015 3:40 PM
> > > To: Bjørn Mork
> > > Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi,
> > > Sy Jong; linux-kernel@vger.kernel.org; Hayato Momma
> > > Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > > promiscuous mode control
> > >
> > > > Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous
> > > > mode control
> > > >
> > > > Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> > > >
> > > > > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> > > > >
> > > > > Add netlink directives and ndo entry to control VF multicast
> > > > > promiscuous
> > > mode.
> > > > >
> > > > > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC
> > > > > addresses per VF. It means that we cannot assign over 30 IPv6
> > > > > addresses to a single VF interface on VM. We want thousands IPv6
> > > addresses in VM.
> > > > >
> > > > > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > > > > It enables all multicast packets are delivered to the target VF.
> > > > >
> > > > > This patch prepares to control that VF multicast promiscuous
> > > functionality.
> > > >
> > > > Adding a new hook for this seems over-complicated to me.  And it
> > > > still doesn't solve the real problems that
> > > >  a) the user has to know about this limit, and
> > > >  b) manually configure the feature
> > > >
> > > > Most of us, lacking the ability to imagine such arbitrary hardware
> > > > limitations, will go through a few hours of frustrating debugging
> > > > before we figure this one out...
> > > >
> > > > Why can't the ixgbevf driver just automatically signal the ixgbe
> > > > driver to enable multicast promiscuous mode whenever the list
> > > > grows past the limit?
> > >
> > > I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> > > https://lkml.org/lkml/2014/11/27/269
> > >
> > > The previous patch introduces API between ixgbe and ixgbevf driver
> > > to enable multicast promiscuous mode, and ixgbevf enables it
> > > automatically if the number of addresses is over than 30.
> >
> > I believe the issue is with allowing a VF to automatically enter
> > Promiscuous Multicast without the PF's ok is concern
> 
> So you mean that we should take care about enabling VF multicast
> promiscuous mode in host side, right? The host allows multicast promiscuous
> and VF requests it too, then enables VF multicast promiscuous mode.
> So, what is preferred way to do in host do you think?

I think we are saying the same thing.  I believe it would be fine if the VF requests this to happen (threw a mailbox message like you set up) and the PF will do it if the systems policy has been set up that way (as you did with the control mode).

This way the behavior (related to multicast) is the same as it has been, unless the system has been setup specifically to allow VF multicast promiscuous mode.

I know it's been mentioned that this is onerous on those who want this behavior to be automatic, but I don't see how else it could be done and take account for people that are concerned about allowing a (possibly untrusted) VM promoting itself in to multicast promiscuous mode.

> 
> > over VM isolation.   Of course that isolation, when it comes to multicast, is
> rather limited anyway given that our multicast
> > filter uses only 12-bit of the address for a match.  Still this (or
> > doing it by default) would only open that up considerably more (all
> > multicasts).  I assume for your application you're not concerned, but are
> there other use cases that would worry about such things?
> 
> Sorry I couldn't catch the point.
> 
> What is the issue? I think there is no difference for the users who don't want
> many multicast addresses in guest. In the current implementation,
> overflowed multicast addresses silently discarded in ixgbevf. I believe there
> is no user who want to use over 30 multicast addresses now. If VF multicast
> promiscuous mode is enabled in certain VF, the behavior of other VFs is not
> changed.
> 
> thanks,
> Hiroshi
> 
> >
> > Thanks,
> > -Don Skidmore <donald.c.skidmore@intel.com>
> >
> > >
> > > I got some comment and I would like to clarify the point, but there
> > > was no answer.
> > > That's the reason I submitted this patch.
> > >
> > > Do you think a patch for the ixgbe/ixgbevf driver is preferred?
> > >
> > >
> > > thanks,
> > > Hiroshi
> > >
> > > >
> > > > I'd also like to note that this comment in
> > > > drivers/net/ethernet/intel/ixgbevf/vf.c
> > > > indicates that the author had some ideas about how more than 30
> > > > addresses could/should be handled:
> > > >
> > > > static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> > > > 					  struct net_device *netdev)
> > > > {
> > > > 	struct netdev_hw_addr *ha;
> > > > 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> > > > 	u16 *vector_list = (u16 *)&msgbuf[1];
> > > > 	u32 cnt, i;
> > > >
> > > > 	/* Each entry in the list uses 1 16 bit word.  We have 30
> > > > 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> > > > 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> > > > 	 * there are more than 30 MC addresses to add then punt the
> > > > 	 * extras for now and then add code to handle more than 30 later.
> > > > 	 * It would be unusual for a server to request that many multi-cast
> > > > 	 * addresses except for in large enterprise network environments.
> > > > 	 */
> > > >
> > > >
> > > >
> > > > The last 2 lines of that comment are of course totally bogus and
> > > > pointless and should be deleted in any case...  It's obvious that
> > > > 30 multicast addresses is ridiculously low for lots of normal use cases.
> > > >
> > > >
> > > > Bjørn

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-21  1:30           ` Skidmore, Donald C
  0 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-21  1:30 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma



> -----Original Message-----
> From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> Sent: Tuesday, January 20, 2015 5:07 PM
> To: Skidmore, Donald C; Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; Hayato Momma
> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > promiscuous mode control
> >
> >
> >
> > > -----Original Message-----
> > > From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> > > Sent: Tuesday, January 20, 2015 3:40 PM
> > > To: Bjørn Mork
> > > Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi,
> > > Sy Jong; linux-kernel@vger.kernel.org; Hayato Momma
> > > Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > > promiscuous mode control
> > >
> > > > Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous
> > > > mode control
> > > >
> > > > Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> > > >
> > > > > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> > > > >
> > > > > Add netlink directives and ndo entry to control VF multicast
> > > > > promiscuous
> > > mode.
> > > > >
> > > > > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC
> > > > > addresses per VF. It means that we cannot assign over 30 IPv6
> > > > > addresses to a single VF interface on VM. We want thousands IPv6
> > > addresses in VM.
> > > > >
> > > > > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > > > > It enables all multicast packets are delivered to the target VF.
> > > > >
> > > > > This patch prepares to control that VF multicast promiscuous
> > > functionality.
> > > >
> > > > Adding a new hook for this seems over-complicated to me.  And it
> > > > still doesn't solve the real problems that
> > > >  a) the user has to know about this limit, and
> > > >  b) manually configure the feature
> > > >
> > > > Most of us, lacking the ability to imagine such arbitrary hardware
> > > > limitations, will go through a few hours of frustrating debugging
> > > > before we figure this one out...
> > > >
> > > > Why can't the ixgbevf driver just automatically signal the ixgbe
> > > > driver to enable multicast promiscuous mode whenever the list
> > > > grows past the limit?
> > >
> > > I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> > > https://lkml.org/lkml/2014/11/27/269
> > >
> > > The previous patch introduces API between ixgbe and ixgbevf driver
> > > to enable multicast promiscuous mode, and ixgbevf enables it
> > > automatically if the number of addresses is over than 30.
> >
> > I believe the issue is with allowing a VF to automatically enter
> > Promiscuous Multicast without the PF's ok is concern
> 
> So you mean that we should take care about enabling VF multicast
> promiscuous mode in host side, right? The host allows multicast promiscuous
> and VF requests it too, then enables VF multicast promiscuous mode.
> So, what is preferred way to do in host do you think?

I think we are saying the same thing.  I believe it would be fine if the VF requests this to happen (threw a mailbox message like you set up) and the PF will do it if the systems policy has been set up that way (as you did with the control mode).

This way the behavior (related to multicast) is the same as it has been, unless the system has been setup specifically to allow VF multicast promiscuous mode.

I know it's been mentioned that this is onerous on those who want this behavior to be automatic, but I don't see how else it could be done and take account for people that are concerned about allowing a (possibly untrusted) VM promoting itself in to multicast promiscuous mode.

> 
> > over VM isolation.   Of course that isolation, when it comes to multicast, is
> rather limited anyway given that our multicast
> > filter uses only 12-bit of the address for a match.  Still this (or
> > doing it by default) would only open that up considerably more (all
> > multicasts).  I assume for your application you're not concerned, but are
> there other use cases that would worry about such things?
> 
> Sorry I couldn't catch the point.
> 
> What is the issue? I think there is no difference for the users who don't want
> many multicast addresses in guest. In the current implementation,
> overflowed multicast addresses silently discarded in ixgbevf. I believe there
> is no user who want to use over 30 multicast addresses now. If VF multicast
> promiscuous mode is enabled in certain VF, the behavior of other VFs is not
> changed.
> 
> thanks,
> Hiroshi
> 
> >
> > Thanks,
> > -Don Skidmore <donald.c.skidmore@intel.com>
> >
> > >
> > > I got some comment and I would like to clarify the point, but there
> > > was no answer.
> > > That's the reason I submitted this patch.
> > >
> > > Do you think a patch for the ixgbe/ixgbevf driver is preferred?
> > >
> > >
> > > thanks,
> > > Hiroshi
> > >
> > > >
> > > > I'd also like to note that this comment in
> > > > drivers/net/ethernet/intel/ixgbevf/vf.c
> > > > indicates that the author had some ideas about how more than 30
> > > > addresses could/should be handled:
> > > >
> > > > static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> > > > 					  struct net_device *netdev)
> > > > {
> > > > 	struct netdev_hw_addr *ha;
> > > > 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> > > > 	u16 *vector_list = (u16 *)&msgbuf[1];
> > > > 	u32 cnt, i;
> > > >
> > > > 	/* Each entry in the list uses 1 16 bit word.  We have 30
> > > > 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> > > > 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> > > > 	 * there are more than 30 MC addresses to add then punt the
> > > > 	 * extras for now and then add code to handle more than 30 later.
> > > > 	 * It would be unusual for a server to request that many multi-cast
> > > > 	 * addresses except for in large enterprise network environments.
> > > > 	 */
> > > >
> > > >
> > > >
> > > > The last 2 lines of that comment are of course totally bogus and
> > > > pointless and should be deleted in any case...  It's obvious that
> > > > 30 multicast addresses is ridiculously low for lots of normal use cases.
> > > >
> > > >
> > > > Bjørn


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-20 23:40     ` Hiroshi Shimamoto
@ 2015-01-21  9:26       ` Bjørn Mork
  -1 siblings, 0 replies; 46+ messages in thread
From: Bjørn Mork @ 2015-01-21  9:26 UTC (permalink / raw)
  To: Hiroshi Shimamoto
  Cc: Alexander Duyck, e1000-devel, netdev, Choi, Sy Jong,
	Hayato Momma, linux-kernel

Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:

>> Why can't the ixgbevf driver just automatically signal the ixgbe driver
>> to enable multicast promiscuous mode whenever the list grows past the
>> limit?
>
> I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> https://lkml.org/lkml/2014/11/27/269
>
> The previous patch introduces API between ixgbe and ixgbevf driver to
> enable multicast promiscuous mode, and ixgbevf enables it automatically
> if the number of addresses is over than 30.
>
> I got some comment and I would like to clarify the point, but there was no
> answer.
> That's the reason I submitted this patch.

Thanks.  Yes, now I understand why you want to have a policy knob.

I still think the policy could select between "automatic"/"disallowed"
instead of "enabled"/"disabled", but that's a minor detail. Likewise is
the actual implemention of "automatic".  I think you could do that
within the current VF-PF protocol by overloading the MC address "count".

But a more generic question for netdev is: Does this VF policy API
really scale?

How many different VF policy tunables can you imaging add up over a few
years and drivers.  Currently each policy flag require its own ndo hook.
I probably don't have much to say here, but IMHO this scheme had already
failed when .ndo_set_vf_spoofchk was added..


Bjørn

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-21  9:26       ` Bjørn Mork
  0 siblings, 0 replies; 46+ messages in thread
From: Bjørn Mork @ 2015-01-21  9:26 UTC (permalink / raw)
  To: Hiroshi Shimamoto
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:

>> Why can't the ixgbevf driver just automatically signal the ixgbe driver
>> to enable multicast promiscuous mode whenever the list grows past the
>> limit?
>
> I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> https://lkml.org/lkml/2014/11/27/269
>
> The previous patch introduces API between ixgbe and ixgbevf driver to
> enable multicast promiscuous mode, and ixgbevf enables it automatically
> if the number of addresses is over than 30.
>
> I got some comment and I would like to clarify the point, but there was no
> answer.
> That's the reason I submitted this patch.

Thanks.  Yes, now I understand why you want to have a policy knob.

I still think the policy could select between "automatic"/"disallowed"
instead of "enabled"/"disabled", but that's a minor detail. Likewise is
the actual implemention of "automatic".  I think you could do that
within the current VF-PF protocol by overloading the MC address "count".

But a more generic question for netdev is: Does this VF policy API
really scale?

How many different VF policy tunables can you imaging add up over a few
years and drivers.  Currently each policy flag require its own ndo hook.
I probably don't have much to say here, but IMHO this scheme had already
failed when .ndo_set_vf_spoofchk was added..


Bjørn

------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-21  1:30           ` Skidmore, Donald C
@ 2015-01-21 11:38             ` Hiroshi Shimamoto
  -1 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-21 11:38 UTC (permalink / raw)
  To: Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 7582 bytes --]

> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> 
> 
> 
> > -----Original Message-----
> > From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> > Sent: Tuesday, January 20, 2015 5:07 PM
> > To: Skidmore, Donald C; Bjørn Mork
> > Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> > Jong; linux-kernel@vger.kernel.org; Hayato Momma
> > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> > mode control
> >
> > > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > > promiscuous mode control
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> > > > Sent: Tuesday, January 20, 2015 3:40 PM
> > > > To: Bjørn Mork
> > > > Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi,
> > > > Sy Jong; linux-kernel@vger.kernel.org; Hayato Momma
> > > > Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > > > promiscuous mode control
> > > >
> > > > > Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous
> > > > > mode control
> > > > >
> > > > > Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> > > > >
> > > > > > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> > > > > >
> > > > > > Add netlink directives and ndo entry to control VF multicast
> > > > > > promiscuous
> > > > mode.
> > > > > >
> > > > > > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC
> > > > > > addresses per VF. It means that we cannot assign over 30 IPv6
> > > > > > addresses to a single VF interface on VM. We want thousands IPv6
> > > > addresses in VM.
> > > > > >
> > > > > > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > > > > > It enables all multicast packets are delivered to the target VF.
> > > > > >
> > > > > > This patch prepares to control that VF multicast promiscuous
> > > > functionality.
> > > > >
> > > > > Adding a new hook for this seems over-complicated to me.  And it
> > > > > still doesn't solve the real problems that
> > > > >  a) the user has to know about this limit, and
> > > > >  b) manually configure the feature
> > > > >
> > > > > Most of us, lacking the ability to imagine such arbitrary hardware
> > > > > limitations, will go through a few hours of frustrating debugging
> > > > > before we figure this one out...
> > > > >
> > > > > Why can't the ixgbevf driver just automatically signal the ixgbe
> > > > > driver to enable multicast promiscuous mode whenever the list
> > > > > grows past the limit?
> > > >
> > > > I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> > > > https://lkml.org/lkml/2014/11/27/269
> > > >
> > > > The previous patch introduces API between ixgbe and ixgbevf driver
> > > > to enable multicast promiscuous mode, and ixgbevf enables it
> > > > automatically if the number of addresses is over than 30.
> > >
> > > I believe the issue is with allowing a VF to automatically enter
> > > Promiscuous Multicast without the PF's ok is concern
> >
> > So you mean that we should take care about enabling VF multicast
> > promiscuous mode in host side, right? The host allows multicast promiscuous
> > and VF requests it too, then enables VF multicast promiscuous mode.
> > So, what is preferred way to do in host do you think?
> 
> I think we are saying the same thing.  I believe it would be fine if the VF requests this to happen (threw a mailbox message
> like you set up) and the PF will do it if the systems policy has been set up that way (as you did with the control mode).
> 
> This way the behavior (related to multicast) is the same as it has been, unless the system has been setup specifically
> to allow VF multicast promiscuous mode.

Now I understand what you're saying.
Will make patches that make knob in host/PF and ixgbe/ixgbevf interface.
I think I should try to find whether there is a way to make the know without ndo.

> 
> I know it's been mentioned that this is onerous on those who want this behavior to be automatic, but I don't see how else
> it could be done and take account for people that are concerned about allowing a (possibly untrusted) VM promoting itself
> in to multicast promiscuous mode.

I didn't mind its onerousness so much.
My concern is what is the real issue that VF multicast promiscuous mode can cause.
I think there is the 4k entries to filter multicast address, and the current ixgbe/ixgbevf
can turn all bits on from VM. That is almost same as enabling multicast promiscuous mode.
I mean that we can receive all multicast addresses by an onerous operation in untrusted VM.
I think we should clarify what is real security issue in this context.

thanks,
Hiroshi

> 
> >
> > > over VM isolation.   Of course that isolation, when it comes to multicast, is
> > rather limited anyway given that our multicast
> > > filter uses only 12-bit of the address for a match.  Still this (or
> > > doing it by default) would only open that up considerably more (all
> > > multicasts).  I assume for your application you're not concerned, but are
> > there other use cases that would worry about such things?
> >
> > Sorry I couldn't catch the point.
> >
> > What is the issue? I think there is no difference for the users who don't want
> > many multicast addresses in guest. In the current implementation,
> > overflowed multicast addresses silently discarded in ixgbevf. I believe there
> > is no user who want to use over 30 multicast addresses now. If VF multicast
> > promiscuous mode is enabled in certain VF, the behavior of other VFs is not
> > changed.
> >
> > thanks,
> > Hiroshi
> >
> > >
> > > Thanks,
> > > -Don Skidmore <donald.c.skidmore@intel.com>
> > >
> > > >
> > > > I got some comment and I would like to clarify the point, but there
> > > > was no answer.
> > > > That's the reason I submitted this patch.
> > > >
> > > > Do you think a patch for the ixgbe/ixgbevf driver is preferred?
> > > >
> > > >
> > > > thanks,
> > > > Hiroshi
> > > >
> > > > >
> > > > > I'd also like to note that this comment in
> > > > > drivers/net/ethernet/intel/ixgbevf/vf.c
> > > > > indicates that the author had some ideas about how more than 30
> > > > > addresses could/should be handled:
> > > > >
> > > > > static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> > > > > 					  struct net_device *netdev)
> > > > > {
> > > > > 	struct netdev_hw_addr *ha;
> > > > > 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> > > > > 	u16 *vector_list = (u16 *)&msgbuf[1];
> > > > > 	u32 cnt, i;
> > > > >
> > > > > 	/* Each entry in the list uses 1 16 bit word.  We have 30
> > > > > 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> > > > > 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> > > > > 	 * there are more than 30 MC addresses to add then punt the
> > > > > 	 * extras for now and then add code to handle more than 30 later.
> > > > > 	 * It would be unusual for a server to request that many multi-cast
> > > > > 	 * addresses except for in large enterprise network environments.
> > > > > 	 */
> > > > >
> > > > >
> > > > >
> > > > > The last 2 lines of that comment are of course totally bogus and
> > > > > pointless and should be deleted in any case...  It's obvious that
> > > > > 30 multicast addresses is ridiculously low for lots of normal use cases.
> > > > >
> > > > >
> > > > > Bjørn

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-21 11:38             ` Hiroshi Shimamoto
  0 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-21 11:38 UTC (permalink / raw)
  To: Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, Hayato Momma, linux-kernel

> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> 
> 
> 
> > -----Original Message-----
> > From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> > Sent: Tuesday, January 20, 2015 5:07 PM
> > To: Skidmore, Donald C; Bjørn Mork
> > Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> > Jong; linux-kernel@vger.kernel.org; Hayato Momma
> > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> > mode control
> >
> > > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > > promiscuous mode control
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> > > > Sent: Tuesday, January 20, 2015 3:40 PM
> > > > To: Bjørn Mork
> > > > Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi,
> > > > Sy Jong; linux-kernel@vger.kernel.org; Hayato Momma
> > > > Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > > > promiscuous mode control
> > > >
> > > > > Subject: Re: [PATCH 1/2] if_link: Add VF multicast promiscuous
> > > > > mode control
> > > > >
> > > > > Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> writes:
> > > > >
> > > > > > From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
> > > > > >
> > > > > > Add netlink directives and ndo entry to control VF multicast
> > > > > > promiscuous
> > > > mode.
> > > > > >
> > > > > > Intel ixgbe and ixgbevf driver can handle only 30 multicast MAC
> > > > > > addresses per VF. It means that we cannot assign over 30 IPv6
> > > > > > addresses to a single VF interface on VM. We want thousands IPv6
> > > > addresses in VM.
> > > > > >
> > > > > > There is capability of multicast promiscuous mode in Intel 82599 chip.
> > > > > > It enables all multicast packets are delivered to the target VF.
> > > > > >
> > > > > > This patch prepares to control that VF multicast promiscuous
> > > > functionality.
> > > > >
> > > > > Adding a new hook for this seems over-complicated to me.  And it
> > > > > still doesn't solve the real problems that
> > > > >  a) the user has to know about this limit, and
> > > > >  b) manually configure the feature
> > > > >
> > > > > Most of us, lacking the ability to imagine such arbitrary hardware
> > > > > limitations, will go through a few hours of frustrating debugging
> > > > > before we figure this one out...
> > > > >
> > > > > Why can't the ixgbevf driver just automatically signal the ixgbe
> > > > > driver to enable multicast promiscuous mode whenever the list
> > > > > grows past the limit?
> > > >
> > > > I had submitted a patch to change ixgbe and ixgbevf driver for this issue.
> > > > https://lkml.org/lkml/2014/11/27/269
> > > >
> > > > The previous patch introduces API between ixgbe and ixgbevf driver
> > > > to enable multicast promiscuous mode, and ixgbevf enables it
> > > > automatically if the number of addresses is over than 30.
> > >
> > > I believe the issue is with allowing a VF to automatically enter
> > > Promiscuous Multicast without the PF's ok is concern
> >
> > So you mean that we should take care about enabling VF multicast
> > promiscuous mode in host side, right? The host allows multicast promiscuous
> > and VF requests it too, then enables VF multicast promiscuous mode.
> > So, what is preferred way to do in host do you think?
> 
> I think we are saying the same thing.  I believe it would be fine if the VF requests this to happen (threw a mailbox message
> like you set up) and the PF will do it if the systems policy has been set up that way (as you did with the control mode).
> 
> This way the behavior (related to multicast) is the same as it has been, unless the system has been setup specifically
> to allow VF multicast promiscuous mode.

Now I understand what you're saying.
Will make patches that make knob in host/PF and ixgbe/ixgbevf interface.
I think I should try to find whether there is a way to make the know without ndo.

> 
> I know it's been mentioned that this is onerous on those who want this behavior to be automatic, but I don't see how else
> it could be done and take account for people that are concerned about allowing a (possibly untrusted) VM promoting itself
> in to multicast promiscuous mode.

I didn't mind its onerousness so much.
My concern is what is the real issue that VF multicast promiscuous mode can cause.
I think there is the 4k entries to filter multicast address, and the current ixgbe/ixgbevf
can turn all bits on from VM. That is almost same as enabling multicast promiscuous mode.
I mean that we can receive all multicast addresses by an onerous operation in untrusted VM.
I think we should clarify what is real security issue in this context.

thanks,
Hiroshi

> 
> >
> > > over VM isolation.   Of course that isolation, when it comes to multicast, is
> > rather limited anyway given that our multicast
> > > filter uses only 12-bit of the address for a match.  Still this (or
> > > doing it by default) would only open that up considerably more (all
> > > multicasts).  I assume for your application you're not concerned, but are
> > there other use cases that would worry about such things?
> >
> > Sorry I couldn't catch the point.
> >
> > What is the issue? I think there is no difference for the users who don't want
> > many multicast addresses in guest. In the current implementation,
> > overflowed multicast addresses silently discarded in ixgbevf. I believe there
> > is no user who want to use over 30 multicast addresses now. If VF multicast
> > promiscuous mode is enabled in certain VF, the behavior of other VFs is not
> > changed.
> >
> > thanks,
> > Hiroshi
> >
> > >
> > > Thanks,
> > > -Don Skidmore <donald.c.skidmore@intel.com>
> > >
> > > >
> > > > I got some comment and I would like to clarify the point, but there
> > > > was no answer.
> > > > That's the reason I submitted this patch.
> > > >
> > > > Do you think a patch for the ixgbe/ixgbevf driver is preferred?
> > > >
> > > >
> > > > thanks,
> > > > Hiroshi
> > > >
> > > > >
> > > > > I'd also like to note that this comment in
> > > > > drivers/net/ethernet/intel/ixgbevf/vf.c
> > > > > indicates that the author had some ideas about how more than 30
> > > > > addresses could/should be handled:
> > > > >
> > > > > static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
> > > > > 					  struct net_device *netdev)
> > > > > {
> > > > > 	struct netdev_hw_addr *ha;
> > > > > 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
> > > > > 	u16 *vector_list = (u16 *)&msgbuf[1];
> > > > > 	u32 cnt, i;
> > > > >
> > > > > 	/* Each entry in the list uses 1 16 bit word.  We have 30
> > > > > 	 * 16 bit words available in our HW msg buffer (minus 1 for the
> > > > > 	 * msg type).  That's 30 hash values if we pack 'em right.  If
> > > > > 	 * there are more than 30 MC addresses to add then punt the
> > > > > 	 * extras for now and then add code to handle more than 30 later.
> > > > > 	 * It would be unusual for a server to request that many multi-cast
> > > > > 	 * addresses except for in large enterprise network environments.
> > > > > 	 */
> > > > >
> > > > >
> > > > >
> > > > > The last 2 lines of that comment are of course totally bogus and
> > > > > pointless and should be deleted in any case...  It's obvious that
> > > > > 30 multicast addresses is ridiculously low for lots of normal use cases.
> > > > >
> > > > >
> > > > > Bjørn

------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-21 11:38             ` Hiroshi Shimamoto
@ 2015-01-21 11:56               ` David Laight
  -1 siblings, 0 replies; 46+ messages in thread
From: David Laight @ 2015-01-21 11:56 UTC (permalink / raw)
  To: 'Hiroshi Shimamoto', Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 1000 bytes --]

From: Hiroshi Shimamoto
> My concern is what is the real issue that VF multicast promiscuous mode can cause.
> I think there is the 4k entries to filter multicast address, and the current ixgbe/ixgbevf
> can turn all bits on from VM. That is almost same as enabling multicast promiscuous mode.
> I mean that we can receive all multicast addresses by an onerous operation in untrusted VM.
> I think we should clarify what is real security issue in this context.

If you are worried about passing un-enabled multicasts to users then
what about doing a software hash of received multicasts and checking
against an actual list of multicasts enabled for that hash entry.
Under normal conditions there is likely to be only a single address to check.

It may (or may not) be best to use the same hash as any hashing hardware
filter uses.

	David

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-21 11:56               ` David Laight
  0 siblings, 0 replies; 46+ messages in thread
From: David Laight @ 2015-01-21 11:56 UTC (permalink / raw)
  To: 'Hiroshi Shimamoto', Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

From: Hiroshi Shimamoto
> My concern is what is the real issue that VF multicast promiscuous mode can cause.
> I think there is the 4k entries to filter multicast address, and the current ixgbe/ixgbevf
> can turn all bits on from VM. That is almost same as enabling multicast promiscuous mode.
> I mean that we can receive all multicast addresses by an onerous operation in untrusted VM.
> I think we should clarify what is real security issue in this context.

If you are worried about passing un-enabled multicasts to users then
what about doing a software hash of received multicasts and checking
against an actual list of multicasts enabled for that hash entry.
Under normal conditions there is likely to be only a single address to check.

It may (or may not) be best to use the same hash as any hashing hardware
filter uses.

	David


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-21 11:56               ` David Laight
@ 2015-01-21 12:18                 ` Hiroshi Shimamoto
  -1 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-21 12:18 UTC (permalink / raw)
  To: David Laight, Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 1574 bytes --]

> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> 
> From: Hiroshi Shimamoto
> > My concern is what is the real issue that VF multicast promiscuous mode can cause.
> > I think there is the 4k entries to filter multicast address, and the current ixgbe/ixgbevf
> > can turn all bits on from VM. That is almost same as enabling multicast promiscuous mode.
> > I mean that we can receive all multicast addresses by an onerous operation in untrusted VM.
> > I think we should clarify what is real security issue in this context.
> 
> If you are worried about passing un-enabled multicasts to users then
> what about doing a software hash of received multicasts and checking
> against an actual list of multicasts enabled for that hash entry.
> Under normal conditions there is likely to be only a single address to check.
> 
> It may (or may not) be best to use the same hash as any hashing hardware
> filter uses.

thanks for the comment. But I don't think that is the point.

I guess, introducing VF multicast promiscuous mode seems to add new privilege
to peek every multicast packet in VM and that doesn't look good.
On the other hand, I think that there has been the same privilege in the current
ixgbe/ixgbevf implementation already. Or I'm reading the code wrongly.
I'd like to clarify what is the issue of allowing to receive all multicast packets.

thanks,
Hiroshi

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-21 12:18                 ` Hiroshi Shimamoto
  0 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-21 12:18 UTC (permalink / raw)
  To: David Laight, Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, Hayato Momma, linux-kernel

> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> 
> From: Hiroshi Shimamoto
> > My concern is what is the real issue that VF multicast promiscuous mode can cause.
> > I think there is the 4k entries to filter multicast address, and the current ixgbe/ixgbevf
> > can turn all bits on from VM. That is almost same as enabling multicast promiscuous mode.
> > I mean that we can receive all multicast addresses by an onerous operation in untrusted VM.
> > I think we should clarify what is real security issue in this context.
> 
> If you are worried about passing un-enabled multicasts to users then
> what about doing a software hash of received multicasts and checking
> against an actual list of multicasts enabled for that hash entry.
> Under normal conditions there is likely to be only a single address to check.
> 
> It may (or may not) be best to use the same hash as any hashing hardware
> filter uses.

thanks for the comment. But I don't think that is the point.

I guess, introducing VF multicast promiscuous mode seems to add new privilege
to peek every multicast packet in VM and that doesn't look good.
On the other hand, I think that there has been the same privilege in the current
ixgbe/ixgbevf implementation already. Or I'm reading the code wrongly.
I'd like to clarify what is the issue of allowing to receive all multicast packets.

thanks,
Hiroshi

------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-21 12:18                 ` Hiroshi Shimamoto
@ 2015-01-21 20:27                   ` Skidmore, Donald C
  -1 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-21 20:27 UTC (permalink / raw)
  To: Hiroshi Shimamoto, David Laight, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2655 bytes --]



> -----Original Message-----
> From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> Sent: Wednesday, January 21, 2015 4:18 AM
> To: David Laight; Skidmore, Donald C; Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; Hayato Momma
> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > promiscuous mode control
> >
> > From: Hiroshi Shimamoto
> > > My concern is what is the real issue that VF multicast promiscuous mode
> can cause.
> > > I think there is the 4k entries to filter multicast address, and the
> > > current ixgbe/ixgbevf can turn all bits on from VM. That is almost same as
> enabling multicast promiscuous mode.
> > > I mean that we can receive all multicast addresses by an onerous
> operation in untrusted VM.
> > > I think we should clarify what is real security issue in this context.
> >
> > If you are worried about passing un-enabled multicasts to users then
> > what about doing a software hash of received multicasts and checking
> > against an actual list of multicasts enabled for that hash entry.
> > Under normal conditions there is likely to be only a single address to check.
> >
> > It may (or may not) be best to use the same hash as any hashing
> > hardware filter uses.
> 
> thanks for the comment. But I don't think that is the point.
> 
> I guess, introducing VF multicast promiscuous mode seems to add new
> privilege to peek every multicast packet in VM and that doesn't look good.
> On the other hand, I think that there has been the same privilege in the
> current ixgbe/ixgbevf implementation already. Or I'm reading the code
> wrongly.
> I'd like to clarify what is the issue of allowing to receive all multicast packets.

Allowing a VM to give itself the privilege of seeing every multicast packet could be seen as a hole in VM isolation.  Now if the host system allows this policy I don't see this as an issue as someone specifically allowed this to happen and then must not be concerned.  We could even log that it has occurred, which I believe your patch did do.  The issue is also further muddied, as you mentioned above, since some of these multicast packets are leaking anyway (the HW currently uses a 12 bit mask).  It's just that this change would greatly enlarge that hole from a fraction to all multicast packets.    

> 
> thanks,
> Hiroshi

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-21 20:27                   ` Skidmore, Donald C
  0 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-21 20:27 UTC (permalink / raw)
  To: Hiroshi Shimamoto, David Laight, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, Hayato Momma, linux-kernel



> -----Original Message-----
> From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> Sent: Wednesday, January 21, 2015 4:18 AM
> To: David Laight; Skidmore, Donald C; Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; Hayato Momma
> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > promiscuous mode control
> >
> > From: Hiroshi Shimamoto
> > > My concern is what is the real issue that VF multicast promiscuous mode
> can cause.
> > > I think there is the 4k entries to filter multicast address, and the
> > > current ixgbe/ixgbevf can turn all bits on from VM. That is almost same as
> enabling multicast promiscuous mode.
> > > I mean that we can receive all multicast addresses by an onerous
> operation in untrusted VM.
> > > I think we should clarify what is real security issue in this context.
> >
> > If you are worried about passing un-enabled multicasts to users then
> > what about doing a software hash of received multicasts and checking
> > against an actual list of multicasts enabled for that hash entry.
> > Under normal conditions there is likely to be only a single address to check.
> >
> > It may (or may not) be best to use the same hash as any hashing
> > hardware filter uses.
> 
> thanks for the comment. But I don't think that is the point.
> 
> I guess, introducing VF multicast promiscuous mode seems to add new
> privilege to peek every multicast packet in VM and that doesn't look good.
> On the other hand, I think that there has been the same privilege in the
> current ixgbe/ixgbevf implementation already. Or I'm reading the code
> wrongly.
> I'd like to clarify what is the issue of allowing to receive all multicast packets.

Allowing a VM to give itself the privilege of seeing every multicast packet could be seen as a hole in VM isolation.  Now if the host system allows this policy I don't see this as an issue as someone specifically allowed this to happen and then must not be concerned.  We could even log that it has occurred, which I believe your patch did do.  The issue is also further muddied, as you mentioned above, since some of these multicast packets are leaking anyway (the HW currently uses a 12 bit mask).  It's just that this change would greatly enlarge that hole from a fraction to all multicast packets.    

> 
> thanks,
> Hiroshi

------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-21 20:27                   ` Skidmore, Donald C
@ 2015-01-22  9:50                     ` David Laight
  -1 siblings, 0 replies; 46+ messages in thread
From: David Laight @ 2015-01-22  9:50 UTC (permalink / raw)
  To: 'Skidmore, Donald C', Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 2350 bytes --]

From: Skidmore, Donald C 
> > > From: Hiroshi Shimamoto
> > > > My concern is what is the real issue that VF multicast promiscuous mode
> > can cause.
> > > > I think there is the 4k entries to filter multicast address, and the
> > > > current ixgbe/ixgbevf can turn all bits on from VM. That is almost same as
> > enabling multicast promiscuous mode.
> > > > I mean that we can receive all multicast addresses by an onerous
> > operation in untrusted VM.
> > > > I think we should clarify what is real security issue in this context.
> > >
> > > If you are worried about passing un-enabled multicasts to users then
> > > what about doing a software hash of received multicasts and checking
> > > against an actual list of multicasts enabled for that hash entry.
> > > Under normal conditions there is likely to be only a single address to check.
> > >
> > > It may (or may not) be best to use the same hash as any hashing
> > > hardware filter uses.
> >
> > thanks for the comment. But I don't think that is the point.
> >
> > I guess, introducing VF multicast promiscuous mode seems to add new
> > privilege to peek every multicast packet in VM and that doesn't look good.
> > On the other hand, I think that there has been the same privilege in the
> > current ixgbe/ixgbevf implementation already. Or I'm reading the code
> > wrongly.
> > I'd like to clarify what is the issue of allowing to receive all multicast packets.
> 
> Allowing a VM to give itself the privilege of seeing every multicast packet
> could be seen as a hole in VM isolation.
> Now if the host system allows this policy I don't see this as an issue as
> someone specifically allowed this to happen and then must not be concerned.
> We could even log that it has occurred, which I believe your patch did do.
> The issue is also further muddied, as you mentioned above, since some of
> these multicast packets are leaking anyway (the HW currently uses a 12 bit mask).
> It's just that this change would greatly enlarge that hole from a fraction to
> all multicast packets.

Why does it have anything to do with VM isolation?
Isn't is just the same as if the VM were connected directly to the
ethernet cable?

	David

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-22  9:50                     ` David Laight
  0 siblings, 0 replies; 46+ messages in thread
From: David Laight @ 2015-01-22  9:50 UTC (permalink / raw)
  To: 'Skidmore, Donald C', Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

From: Skidmore, Donald C 
> > > From: Hiroshi Shimamoto
> > > > My concern is what is the real issue that VF multicast promiscuous mode
> > can cause.
> > > > I think there is the 4k entries to filter multicast address, and the
> > > > current ixgbe/ixgbevf can turn all bits on from VM. That is almost same as
> > enabling multicast promiscuous mode.
> > > > I mean that we can receive all multicast addresses by an onerous
> > operation in untrusted VM.
> > > > I think we should clarify what is real security issue in this context.
> > >
> > > If you are worried about passing un-enabled multicasts to users then
> > > what about doing a software hash of received multicasts and checking
> > > against an actual list of multicasts enabled for that hash entry.
> > > Under normal conditions there is likely to be only a single address to check.
> > >
> > > It may (or may not) be best to use the same hash as any hashing
> > > hardware filter uses.
> >
> > thanks for the comment. But I don't think that is the point.
> >
> > I guess, introducing VF multicast promiscuous mode seems to add new
> > privilege to peek every multicast packet in VM and that doesn't look good.
> > On the other hand, I think that there has been the same privilege in the
> > current ixgbe/ixgbevf implementation already. Or I'm reading the code
> > wrongly.
> > I'd like to clarify what is the issue of allowing to receive all multicast packets.
> 
> Allowing a VM to give itself the privilege of seeing every multicast packet
> could be seen as a hole in VM isolation.
> Now if the host system allows this policy I don't see this as an issue as
> someone specifically allowed this to happen and then must not be concerned.
> We could even log that it has occurred, which I believe your patch did do.
> The issue is also further muddied, as you mentioned above, since some of
> these multicast packets are leaking anyway (the HW currently uses a 12 bit mask).
> It's just that this change would greatly enlarge that hole from a fraction to
> all multicast packets.

Why does it have anything to do with VM isolation?
Isn't is just the same as if the VM were connected directly to the
ethernet cable?

	David


^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-22  9:50                     ` David Laight
@ 2015-01-22 10:52                       ` Jeff Kirsher
  -1 siblings, 0 replies; 46+ messages in thread
From: Jeff Kirsher @ 2015-01-22 10:52 UTC (permalink / raw)
  To: David Laight
  Cc: 'Skidmore, Donald C',
	Hiroshi Shimamoto, Bjørn Mork, e1000-devel, netdev, Choi,
	Sy Jong, Hayato Momma, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 2595 bytes --]

On Thu, 2015-01-22 at 09:50 +0000, David Laight wrote:
> From: Skidmore, Donald C 
> > > > From: Hiroshi Shimamoto
> > > > > My concern is what is the real issue that VF multicast
> promiscuous mode
> > > can cause.
> > > > > I think there is the 4k entries to filter multicast address,
> and the
> > > > > current ixgbe/ixgbevf can turn all bits on from VM. That is
> almost same as
> > > enabling multicast promiscuous mode.
> > > > > I mean that we can receive all multicast addresses by an
> onerous
> > > operation in untrusted VM.
> > > > > I think we should clarify what is real security issue in this
> context.
> > > >
> > > > If you are worried about passing un-enabled multicasts to users
> then
> > > > what about doing a software hash of received multicasts and
> checking
> > > > against an actual list of multicasts enabled for that hash
> entry.
> > > > Under normal conditions there is likely to be only a single
> address to check.
> > > >
> > > > It may (or may not) be best to use the same hash as any hashing
> > > > hardware filter uses.
> > >
> > > thanks for the comment. But I don't think that is the point.
> > >
> > > I guess, introducing VF multicast promiscuous mode seems to add
> new
> > > privilege to peek every multicast packet in VM and that doesn't
> look good.
> > > On the other hand, I think that there has been the same privilege
> in the
> > > current ixgbe/ixgbevf implementation already. Or I'm reading the
> code
> > > wrongly.
> > > I'd like to clarify what is the issue of allowing to receive all
> multicast packets.
> > 
> > Allowing a VM to give itself the privilege of seeing every multicast
> packet
> > could be seen as a hole in VM isolation.
> > Now if the host system allows this policy I don't see this as an
> issue as
> > someone specifically allowed this to happen and then must not be
> concerned.
> > We could even log that it has occurred, which I believe your patch
> did do.
> > The issue is also further muddied, as you mentioned above, since
> some of
> > these multicast packets are leaking anyway (the HW currently uses a
> 12 bit mask).
> > It's just that this change would greatly enlarge that hole from a
> fraction to
> > all multicast packets.
> 
> Why does it have anything to do with VM isolation?
> Isn't is just the same as if the VM were connected directly to the
> ethernet cable?

So give an example of when the VF driver is connected directly to the
ethernet cable and a PF driver (ixgbe) does not exist, at least that is
what you are suggesting.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-22 10:52                       ` Jeff Kirsher
  0 siblings, 0 replies; 46+ messages in thread
From: Jeff Kirsher @ 2015-01-22 10:52 UTC (permalink / raw)
  To: David Laight
  Cc: e1000-devel, Choi, Sy Jong, linux-kernel, Hayato Momma, netdev,
	Hiroshi Shimamoto, Bjørn Mork


[-- Attachment #1.1: Type: text/plain, Size: 2595 bytes --]

On Thu, 2015-01-22 at 09:50 +0000, David Laight wrote:
> From: Skidmore, Donald C 
> > > > From: Hiroshi Shimamoto
> > > > > My concern is what is the real issue that VF multicast
> promiscuous mode
> > > can cause.
> > > > > I think there is the 4k entries to filter multicast address,
> and the
> > > > > current ixgbe/ixgbevf can turn all bits on from VM. That is
> almost same as
> > > enabling multicast promiscuous mode.
> > > > > I mean that we can receive all multicast addresses by an
> onerous
> > > operation in untrusted VM.
> > > > > I think we should clarify what is real security issue in this
> context.
> > > >
> > > > If you are worried about passing un-enabled multicasts to users
> then
> > > > what about doing a software hash of received multicasts and
> checking
> > > > against an actual list of multicasts enabled for that hash
> entry.
> > > > Under normal conditions there is likely to be only a single
> address to check.
> > > >
> > > > It may (or may not) be best to use the same hash as any hashing
> > > > hardware filter uses.
> > >
> > > thanks for the comment. But I don't think that is the point.
> > >
> > > I guess, introducing VF multicast promiscuous mode seems to add
> new
> > > privilege to peek every multicast packet in VM and that doesn't
> look good.
> > > On the other hand, I think that there has been the same privilege
> in the
> > > current ixgbe/ixgbevf implementation already. Or I'm reading the
> code
> > > wrongly.
> > > I'd like to clarify what is the issue of allowing to receive all
> multicast packets.
> > 
> > Allowing a VM to give itself the privilege of seeing every multicast
> packet
> > could be seen as a hole in VM isolation.
> > Now if the host system allows this policy I don't see this as an
> issue as
> > someone specifically allowed this to happen and then must not be
> concerned.
> > We could even log that it has occurred, which I believe your patch
> did do.
> > The issue is also further muddied, as you mentioned above, since
> some of
> > these multicast packets are leaking anyway (the HW currently uses a
> 12 bit mask).
> > It's just that this change would greatly enlarge that hole from a
> fraction to
> > all multicast packets.
> 
> Why does it have anything to do with VM isolation?
> Isn't is just the same as if the VM were connected directly to the
> ethernet cable?

So give an example of when the VF driver is connected directly to the
ethernet cable and a PF driver (ixgbe) does not exist, at least that is
what you are suggesting.

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 392 bytes --]

------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet

[-- Attachment #3: Type: text/plain, Size: 257 bytes --]

_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-22  9:50                     ` David Laight
@ 2015-01-22 19:00                       ` Skidmore, Donald C
  -1 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-22 19:00 UTC (permalink / raw)
  To: David Laight, Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 3715 bytes --]

> -----Original Message-----
> From: David Laight [mailto:David.Laight@ACULAB.COM]
> Sent: Thursday, January 22, 2015 1:50 AM
> To: Skidmore, Donald C; Hiroshi Shimamoto; Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; Hayato Momma
> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> From: Skidmore, Donald C
> > > > From: Hiroshi Shimamoto
> > > > > My concern is what is the real issue that VF multicast
> > > > > promiscuous mode
> > > can cause.
> > > > > I think there is the 4k entries to filter multicast address, and
> > > > > the current ixgbe/ixgbevf can turn all bits on from VM. That is
> > > > > almost same as
> > > enabling multicast promiscuous mode.
> > > > > I mean that we can receive all multicast addresses by an onerous
> > > operation in untrusted VM.
> > > > > I think we should clarify what is real security issue in this context.
> > > >
> > > > If you are worried about passing un-enabled multicasts to users
> > > > then what about doing a software hash of received multicasts and
> > > > checking against an actual list of multicasts enabled for that hash entry.
> > > > Under normal conditions there is likely to be only a single address to
> check.
> > > >
> > > > It may (or may not) be best to use the same hash as any hashing
> > > > hardware filter uses.
> > >
> > > thanks for the comment. But I don't think that is the point.
> > >
> > > I guess, introducing VF multicast promiscuous mode seems to add new
> > > privilege to peek every multicast packet in VM and that doesn't look
> good.
> > > On the other hand, I think that there has been the same privilege in
> > > the current ixgbe/ixgbevf implementation already. Or I'm reading the
> > > code wrongly.
> > > I'd like to clarify what is the issue of allowing to receive all multicast
> packets.
> >
> > Allowing a VM to give itself the privilege of seeing every multicast
> > packet could be seen as a hole in VM isolation.
> > Now if the host system allows this policy I don't see this as an issue
> > as someone specifically allowed this to happen and then must not be
> concerned.
> > We could even log that it has occurred, which I believe your patch did do.
> > The issue is also further muddied, as you mentioned above, since some
> > of these multicast packets are leaking anyway (the HW currently uses a 12
> bit mask).
> > It's just that this change would greatly enlarge that hole from a
> > fraction to all multicast packets.
> 
> Why does it have anything to do with VM isolation?
> Isn't is just the same as if the VM were connected directly to the ethernet
> cable?
> 
> 	David

I do see your point. :)

My hang up is more related to: without the nob to enable it (off by default) we are letting one VF dictate policy for all the other VFs and the PF.  If one VF needs to be in promiscuous multicast so is everyone else.  Their stacks now needs to deal with all the extra multicast packets.  As you point out this might not be a direct concern for isolation in that the VM could have 'chosen' to join any Multicast group and seen this traffic.  My concern over isolation is one VF has chosen that all the other VM now have to see this multicast traffic.

This will effect performance as these packets will need to be replicated.  So once again my issue is a VF is making the policy decision that doing this is ok for the entire system.  That should be the PF's job.

Does this give you a better idea where my concerns lay?


ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-22 19:00                       ` Skidmore, Donald C
  0 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-22 19:00 UTC (permalink / raw)
  To: David Laight, Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, Hayato Momma

> -----Original Message-----
> From: David Laight [mailto:David.Laight@ACULAB.COM]
> Sent: Thursday, January 22, 2015 1:50 AM
> To: Skidmore, Donald C; Hiroshi Shimamoto; Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; Hayato Momma
> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> From: Skidmore, Donald C
> > > > From: Hiroshi Shimamoto
> > > > > My concern is what is the real issue that VF multicast
> > > > > promiscuous mode
> > > can cause.
> > > > > I think there is the 4k entries to filter multicast address, and
> > > > > the current ixgbe/ixgbevf can turn all bits on from VM. That is
> > > > > almost same as
> > > enabling multicast promiscuous mode.
> > > > > I mean that we can receive all multicast addresses by an onerous
> > > operation in untrusted VM.
> > > > > I think we should clarify what is real security issue in this context.
> > > >
> > > > If you are worried about passing un-enabled multicasts to users
> > > > then what about doing a software hash of received multicasts and
> > > > checking against an actual list of multicasts enabled for that hash entry.
> > > > Under normal conditions there is likely to be only a single address to
> check.
> > > >
> > > > It may (or may not) be best to use the same hash as any hashing
> > > > hardware filter uses.
> > >
> > > thanks for the comment. But I don't think that is the point.
> > >
> > > I guess, introducing VF multicast promiscuous mode seems to add new
> > > privilege to peek every multicast packet in VM and that doesn't look
> good.
> > > On the other hand, I think that there has been the same privilege in
> > > the current ixgbe/ixgbevf implementation already. Or I'm reading the
> > > code wrongly.
> > > I'd like to clarify what is the issue of allowing to receive all multicast
> packets.
> >
> > Allowing a VM to give itself the privilege of seeing every multicast
> > packet could be seen as a hole in VM isolation.
> > Now if the host system allows this policy I don't see this as an issue
> > as someone specifically allowed this to happen and then must not be
> concerned.
> > We could even log that it has occurred, which I believe your patch did do.
> > The issue is also further muddied, as you mentioned above, since some
> > of these multicast packets are leaking anyway (the HW currently uses a 12
> bit mask).
> > It's just that this change would greatly enlarge that hole from a
> > fraction to all multicast packets.
> 
> Why does it have anything to do with VM isolation?
> Isn't is just the same as if the VM were connected directly to the ethernet
> cable?
> 
> 	David

I do see your point. :)

My hang up is more related to: without the nob to enable it (off by default) we are letting one VF dictate policy for all the other VFs and the PF.  If one VF needs to be in promiscuous multicast so is everyone else.  Their stacks now needs to deal with all the extra multicast packets.  As you point out this might not be a direct concern for isolation in that the VM could have 'chosen' to join any Multicast group and seen this traffic.  My concern over isolation is one VF has chosen that all the other VM now have to see this multicast traffic.

This will effect performance as these packets will need to be replicated.  So once again my issue is a VF is making the policy decision that doing this is ok for the entire system.  That should be the PF's job.

Does this give you a better idea where my concerns lay?



^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-22 19:00                       ` Skidmore, Donald C
@ 2015-01-22 19:31                         ` Bjørn Mork
  -1 siblings, 0 replies; 46+ messages in thread
From: Bjørn Mork @ 2015-01-22 19:31 UTC (permalink / raw)
  To: Skidmore, Donald C
  Cc: David Laight, Hiroshi Shimamoto, e1000-devel, netdev, Choi,
	Sy Jong, linux-kernel, Hayato Momma

"Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:

> My hang up is more related to: without the nob to enable it (off by
> default) we are letting one VF dictate policy for all the other VFs
> and the PF.  If one VF needs to be in promiscuous multicast so is
> everyone else.  Their stacks now needs to deal with all the extra
> multicast packets.  As you point out this might not be a direct
> concern for isolation in that the VM could have 'chosen' to join any
> Multicast group and seen this traffic.  My concern over isolation is
> one VF has chosen that all the other VM now have to see this multicast
> traffic.

Apologies if this question is stupid, but I just have to ask about stuff
I don't understand...

Looking at the proposed implementation, the promiscous multicast flag
seems to be a per-VF flag:

+int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf, bool setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vmolr;
+
+	if (vf >= adapter->num_vfs)
+		return -EINVAL;
+
+	adapter->vfinfo[vf].mc_promisc_enabled = setting;
+
+	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
+	if (setting) {
+		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
+		vmolr |= IXGBE_VMOLR_MPE;
+	} else {
+		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
+		vmolr &= ~IXGBE_VMOLR_MPE;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
+
+	return 0;
+}
+

I haven't read the data sheet, but I took a quick look at the excellent
high level driver docs:
http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-driver-companion-guide.pdf

It mentions "Multicast Promiscuous Enable" in its "Thoughts for
Customization" section:

 7.1 Multicast Promiscuous Enable

 The controller has provisions to allow each VF to be put into Multicast
 Promiscuous mode.  The Intel reference driver does not configure this
 option .

 The capability can be enabled/disabled by manipulating the MPE field
 (bit 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)

and showing a section from the data sheet describing the
"PF VM L2 Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"

To me it looks like enabling Promiscuos Multicast for a VF won't affect
any other VF at all.  Is this really not the case?



Bjørn

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-22 19:31                         ` Bjørn Mork
  0 siblings, 0 replies; 46+ messages in thread
From: Bjørn Mork @ 2015-01-22 19:31 UTC (permalink / raw)
  To: Skidmore, Donald C
  Cc: David Laight, Hiroshi Shimamoto, e1000-devel, netdev, Choi,
	Sy Jong, linux-kernel, Hayato Momma

"Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:

> My hang up is more related to: without the nob to enable it (off by
> default) we are letting one VF dictate policy for all the other VFs
> and the PF.  If one VF needs to be in promiscuous multicast so is
> everyone else.  Their stacks now needs to deal with all the extra
> multicast packets.  As you point out this might not be a direct
> concern for isolation in that the VM could have 'chosen' to join any
> Multicast group and seen this traffic.  My concern over isolation is
> one VF has chosen that all the other VM now have to see this multicast
> traffic.

Apologies if this question is stupid, but I just have to ask about stuff
I don't understand...

Looking at the proposed implementation, the promiscous multicast flag
seems to be a per-VF flag:

+int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf, bool setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vmolr;
+
+	if (vf >= adapter->num_vfs)
+		return -EINVAL;
+
+	adapter->vfinfo[vf].mc_promisc_enabled = setting;
+
+	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
+	if (setting) {
+		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
+		vmolr |= IXGBE_VMOLR_MPE;
+	} else {
+		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
+		vmolr &= ~IXGBE_VMOLR_MPE;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
+
+	return 0;
+}
+

I haven't read the data sheet, but I took a quick look at the excellent
high level driver docs:
http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-driver-companion-guide.pdf

It mentions "Multicast Promiscuous Enable" in its "Thoughts for
Customization" section:

 7.1 Multicast Promiscuous Enable

 The controller has provisions to allow each VF to be put into Multicast
 Promiscuous mode.  The Intel reference driver does not configure this
 option .

 The capability can be enabled/disabled by manipulating the MPE field
 (bit 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)

and showing a section from the data sheet describing the
"PF VM L2 Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"

To me it looks like enabling Promiscuos Multicast for a VF won't affect
any other VF at all.  Is this really not the case?



Bjørn

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-22 19:31                         ` Bjørn Mork
@ 2015-01-22 21:34                           ` Skidmore, Donald C
  -1 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-22 21:34 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: David Laight, Hiroshi Shimamoto, e1000-devel, netdev, Choi,
	Sy Jong, linux-kernel, Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 4899 bytes --]



> -----Original Message-----
> From: Bjørn Mork [mailto:bjorn@mork.no]
> Sent: Thursday, January 22, 2015 11:32 AM
> To: Skidmore, Donald C
> Cc: David Laight; Hiroshi Shimamoto; e1000-devel@lists.sourceforge.net;
> netdev@vger.kernel.org; Choi, Sy Jong; linux-kernel@vger.kernel.org;
> Hayato Momma
> Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> 
> > My hang up is more related to: without the nob to enable it (off by
> > default) we are letting one VF dictate policy for all the other VFs
> > and the PF.  If one VF needs to be in promiscuous multicast so is
> > everyone else.  Their stacks now needs to deal with all the extra
> > multicast packets.  As you point out this might not be a direct
> > concern for isolation in that the VM could have 'chosen' to join any
> > Multicast group and seen this traffic.  My concern over isolation is
> > one VF has chosen that all the other VM now have to see this multicast
> > traffic.
> 
> Apologies if this question is stupid, but I just have to ask about stuff I don't
> understand...
> 
> Looking at the proposed implementation, the promiscous multicast flag
> seems to be a per-VF flag:
> 
> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf, bool
> +setting) {
> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> +	struct ixgbe_hw *hw = &adapter->hw;
> +	u32 vmolr;
> +
> +	if (vf >= adapter->num_vfs)
> +		return -EINVAL;
> +
> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> +
> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> +	if (setting) {
> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> +		vmolr |= IXGBE_VMOLR_MPE;
> +	} else {
> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> +		vmolr &= ~IXGBE_VMOLR_MPE;
> +	}
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> +
> +	return 0;
> +}
> +
> 
> I haven't read the data sheet, but I took a quick look at the excellent high
> level driver docs:
> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-driver-
> companion-guide.pdf
> 
> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> Customization" section:
> 
>  7.1 Multicast Promiscuous Enable
> 
>  The controller has provisions to allow each VF to be put into Multicast
> Promiscuous mode.  The Intel reference driver does not configure this
> option .
> 
>  The capability can be enabled/disabled by manipulating the MPE field  (bit
> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> 
> and showing a section from the data sheet describing the "PF VM L2 Control
> Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> 
> To me it looks like enabling Promiscuos Multicast for a VF won't affect any
> other VF at all.  Is this really not the case?
> 
> 
> 
> Bjørn

Clearly not a dumb question at all and I'm glad you mentioned that. :)  I was going off the assumption, been awhile since I read the patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2 which would turn multicast promiscuous on for everyone.  Since the patch is using PFVML2FLT.MPE this lessens my concern over effect on the entire system.

That said I still would prefer having a way to override this behavior on the PF, although I admit my argument is weaker.   I'm still concerned about a VF changing the behavior of the PF without any way to prevent it.  This might be one part philosophical (PF sets policy not the VF) but this still could have a noticeable effect on the overall system.  If any other VFs (or the PF) are receiving MC packets these will have to be replicated which will be a performance hit.  When we use the MC hash this is limited vs. when anyone is in MC promiscuous every MC packet used by another pool would be replicated.  I could imagine in some environments (i.e. public clouds) where you don't trust what is running in your VM you might what to block this from happening. 

In some ways it is almost the mirror image of the issue you brought up:

Adding a new hook for this seems over-complicated to me.  And it still
doesn't solve the real problems that
 a) the user has to know about this limit, and
 b) manually configure the feature

My reverse argument might be that if this happens automatically.  It might take the VM provider a long time to realize performance has taken a hit because some VM asked to join 31 multicast groups and entered MC promiscuous.  Then only to find that they have no way to block such behavior.

Maybe I wouldn't as concerned if the patch author could provide some performance results to show this won't have as a negative effect as I'm afraid it might?

-Don Skidmore <donald.c.skidmore@intel.com>
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-22 21:34                           ` Skidmore, Donald C
  0 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-22 21:34 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: David Laight, Hiroshi Shimamoto, e1000-devel, netdev, Choi,
	Sy Jong, linux-kernel, Hayato Momma



> -----Original Message-----
> From: Bjørn Mork [mailto:bjorn@mork.no]
> Sent: Thursday, January 22, 2015 11:32 AM
> To: Skidmore, Donald C
> Cc: David Laight; Hiroshi Shimamoto; e1000-devel@lists.sourceforge.net;
> netdev@vger.kernel.org; Choi, Sy Jong; linux-kernel@vger.kernel.org;
> Hayato Momma
> Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> 
> > My hang up is more related to: without the nob to enable it (off by
> > default) we are letting one VF dictate policy for all the other VFs
> > and the PF.  If one VF needs to be in promiscuous multicast so is
> > everyone else.  Their stacks now needs to deal with all the extra
> > multicast packets.  As you point out this might not be a direct
> > concern for isolation in that the VM could have 'chosen' to join any
> > Multicast group and seen this traffic.  My concern over isolation is
> > one VF has chosen that all the other VM now have to see this multicast
> > traffic.
> 
> Apologies if this question is stupid, but I just have to ask about stuff I don't
> understand...
> 
> Looking at the proposed implementation, the promiscous multicast flag
> seems to be a per-VF flag:
> 
> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf, bool
> +setting) {
> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> +	struct ixgbe_hw *hw = &adapter->hw;
> +	u32 vmolr;
> +
> +	if (vf >= adapter->num_vfs)
> +		return -EINVAL;
> +
> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> +
> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> +	if (setting) {
> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> +		vmolr |= IXGBE_VMOLR_MPE;
> +	} else {
> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> +		vmolr &= ~IXGBE_VMOLR_MPE;
> +	}
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> +
> +	return 0;
> +}
> +
> 
> I haven't read the data sheet, but I took a quick look at the excellent high
> level driver docs:
> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-driver-
> companion-guide.pdf
> 
> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> Customization" section:
> 
>  7.1 Multicast Promiscuous Enable
> 
>  The controller has provisions to allow each VF to be put into Multicast
> Promiscuous mode.  The Intel reference driver does not configure this
> option .
> 
>  The capability can be enabled/disabled by manipulating the MPE field  (bit
> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> 
> and showing a section from the data sheet describing the "PF VM L2 Control
> Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> 
> To me it looks like enabling Promiscuos Multicast for a VF won't affect any
> other VF at all.  Is this really not the case?
> 
> 
> 
> Bjørn

Clearly not a dumb question at all and I'm glad you mentioned that. :)  I was going off the assumption, been awhile since I read the patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2 which would turn multicast promiscuous on for everyone.  Since the patch is using PFVML2FLT.MPE this lessens my concern over effect on the entire system.

That said I still would prefer having a way to override this behavior on the PF, although I admit my argument is weaker.   I'm still concerned about a VF changing the behavior of the PF without any way to prevent it.  This might be one part philosophical (PF sets policy not the VF) but this still could have a noticeable effect on the overall system.  If any other VFs (or the PF) are receiving MC packets these will have to be replicated which will be a performance hit.  When we use the MC hash this is limited vs. when anyone is in MC promiscuous every MC packet used by another pool would be replicated.  I could imagine in some environments (i.e. public clouds) where you don't trust what is running in your VM you might what to block this from happening. 

In some ways it is almost the mirror image of the issue you brought up:

Adding a new hook for this seems over-complicated to me.  And it still
doesn't solve the real problems that
 a) the user has to know about this limit, and
 b) manually configure the feature

My reverse argument might be that if this happens automatically.  It might take the VM provider a long time to realize performance has taken a hit because some VM asked to join 31 multicast groups and entered MC promiscuous.  Then only to find that they have no way to block such behavior.

Maybe I wouldn't as concerned if the patch author could provide some performance results to show this won't have as a negative effect as I'm afraid it might?

-Don Skidmore <donald.c.skidmore@intel.com>

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-22 21:34                           ` Skidmore, Donald C
@ 2015-01-23  0:32                             ` Hiroshi Shimamoto
  -1 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-23  0:32 UTC (permalink / raw)
  To: Skidmore, Donald C, Bjørn Mork
  Cc: David Laight, e1000-devel, netdev, Choi, Sy Jong, linux-kernel,
	Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 5661 bytes --]

> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> >
> > "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> >
> > > My hang up is more related to: without the nob to enable it (off by
> > > default) we are letting one VF dictate policy for all the other VFs
> > > and the PF.  If one VF needs to be in promiscuous multicast so is
> > > everyone else.  Their stacks now needs to deal with all the extra
> > > multicast packets.  As you point out this might not be a direct
> > > concern for isolation in that the VM could have 'chosen' to join any
> > > Multicast group and seen this traffic.  My concern over isolation is
> > > one VF has chosen that all the other VM now have to see this multicast
> > > traffic.
> >
> > Apologies if this question is stupid, but I just have to ask about stuff I don't
> > understand...
> >
> > Looking at the proposed implementation, the promiscous multicast flag
> > seems to be a per-VF flag:
> >
> > +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf, bool
> > +setting) {
> > +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> > +	struct ixgbe_hw *hw = &adapter->hw;
> > +	u32 vmolr;
> > +
> > +	if (vf >= adapter->num_vfs)
> > +		return -EINVAL;
> > +
> > +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> > +
> > +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> > +	if (setting) {
> > +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> > +		vmolr |= IXGBE_VMOLR_MPE;
> > +	} else {
> > +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> > +		vmolr &= ~IXGBE_VMOLR_MPE;
> > +	}
> > +
> > +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> > +
> > +	return 0;
> > +}
> > +
> >
> > I haven't read the data sheet, but I took a quick look at the excellent high
> > level driver docs:
> > http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-driver-
> > companion-guide.pdf
> >
> > It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> > Customization" section:
> >
> >  7.1 Multicast Promiscuous Enable
> >
> >  The controller has provisions to allow each VF to be put into Multicast
> > Promiscuous mode.  The Intel reference driver does not configure this
> > option .
> >
> >  The capability can be enabled/disabled by manipulating the MPE field  (bit
> > 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> >
> > and showing a section from the data sheet describing the "PF VM L2 Control
> > Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> >
> > To me it looks like enabling Promiscuos Multicast for a VF won't affect any
> > other VF at all.  Is this really not the case?
> >
> >
> >
> > Bjørn
> 
> Clearly not a dumb question at all and I'm glad you mentioned that. :)  I was going off the assumption, been awhile since
> I read the patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2 which would turn multicast promiscuous on for
> everyone.  Since the patch is using PFVML2FLT.MPE this lessens my concern over effect on the entire system.

I believe the patches for this VF multicast promiscuous mode is per VF.

> 
> That said I still would prefer having a way to override this behavior on the PF, although I admit my argument is weaker.
> I'm still concerned about a VF changing the behavior of the PF without any way to prevent it.  This might be one part
> philosophical (PF sets policy not the VF) but this still could have a noticeable effect on the overall system.  If any
> other VFs (or the PF) are receiving MC packets these will have to be replicated which will be a performance hit.  When
> we use the MC hash this is limited vs. when anyone is in MC promiscuous every MC packet used by another pool would be
> replicated.  I could imagine in some environments (i.e. public clouds) where you don't trust what is running in your VM
> you might what to block this from happening.

I understand your request and I'm thinking to submit the patches
  1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
     and enables it when ixgbevf needs over 30 MC addresses.
  2) Add a policy knob to prevent enabling it from the PF.

Does it seem okay?

BTW, I'm bit worried about to use ndo interface for 2) because adding a
new hook makes core code complicated.
Is it really reasonable to do it with ndo?
I haven't find any other suitable method to do it, right now. And using
ndo VF hook looks standard way to control VF functionality.
Then, I think it's the best way to implement this policy in ndo hook.

> 
> In some ways it is almost the mirror image of the issue you brought up:
> 
> Adding a new hook for this seems over-complicated to me.  And it still
> doesn't solve the real problems that
>  a) the user has to know about this limit, and
>  b) manually configure the feature
> 
> My reverse argument might be that if this happens automatically.  It might take the VM provider a long time to realize
> performance has taken a hit because some VM asked to join 31 multicast groups and entered MC promiscuous.  Then only to
> find that they have no way to block such behavior.
> 
> Maybe I wouldn't as concerned if the patch author could provide some performance results to show this won't have as a
> negative effect as I'm afraid it might?

Hm, what kind of test case would you like to have?
The user A who has 30 MC addresses vs the user B who has 31 MC addresses, which
means that MC promiscuous mode, and coming MC packets the user doesn't expect?

thanks,
Hiroshi
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-23  0:32                             ` Hiroshi Shimamoto
  0 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-23  0:32 UTC (permalink / raw)
  To: Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, David Laight,
	Hayato Momma

> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
> >
> > "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> >
> > > My hang up is more related to: without the nob to enable it (off by
> > > default) we are letting one VF dictate policy for all the other VFs
> > > and the PF.  If one VF needs to be in promiscuous multicast so is
> > > everyone else.  Their stacks now needs to deal with all the extra
> > > multicast packets.  As you point out this might not be a direct
> > > concern for isolation in that the VM could have 'chosen' to join any
> > > Multicast group and seen this traffic.  My concern over isolation is
> > > one VF has chosen that all the other VM now have to see this multicast
> > > traffic.
> >
> > Apologies if this question is stupid, but I just have to ask about stuff I don't
> > understand...
> >
> > Looking at the proposed implementation, the promiscous multicast flag
> > seems to be a per-VF flag:
> >
> > +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf, bool
> > +setting) {
> > +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> > +	struct ixgbe_hw *hw = &adapter->hw;
> > +	u32 vmolr;
> > +
> > +	if (vf >= adapter->num_vfs)
> > +		return -EINVAL;
> > +
> > +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> > +
> > +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> > +	if (setting) {
> > +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> > +		vmolr |= IXGBE_VMOLR_MPE;
> > +	} else {
> > +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> > +		vmolr &= ~IXGBE_VMOLR_MPE;
> > +	}
> > +
> > +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> > +
> > +	return 0;
> > +}
> > +
> >
> > I haven't read the data sheet, but I took a quick look at the excellent high
> > level driver docs:
> > http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-driver-
> > companion-guide.pdf
> >
> > It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> > Customization" section:
> >
> >  7.1 Multicast Promiscuous Enable
> >
> >  The controller has provisions to allow each VF to be put into Multicast
> > Promiscuous mode.  The Intel reference driver does not configure this
> > option .
> >
> >  The capability can be enabled/disabled by manipulating the MPE field  (bit
> > 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> >
> > and showing a section from the data sheet describing the "PF VM L2 Control
> > Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> >
> > To me it looks like enabling Promiscuos Multicast for a VF won't affect any
> > other VF at all.  Is this really not the case?
> >
> >
> >
> > Bjørn
> 
> Clearly not a dumb question at all and I'm glad you mentioned that. :)  I was going off the assumption, been awhile since
> I read the patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2 which would turn multicast promiscuous on for
> everyone.  Since the patch is using PFVML2FLT.MPE this lessens my concern over effect on the entire system.

I believe the patches for this VF multicast promiscuous mode is per VF.

> 
> That said I still would prefer having a way to override this behavior on the PF, although I admit my argument is weaker.
> I'm still concerned about a VF changing the behavior of the PF without any way to prevent it.  This might be one part
> philosophical (PF sets policy not the VF) but this still could have a noticeable effect on the overall system.  If any
> other VFs (or the PF) are receiving MC packets these will have to be replicated which will be a performance hit.  When
> we use the MC hash this is limited vs. when anyone is in MC promiscuous every MC packet used by another pool would be
> replicated.  I could imagine in some environments (i.e. public clouds) where you don't trust what is running in your VM
> you might what to block this from happening.

I understand your request and I'm thinking to submit the patches
  1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
     and enables it when ixgbevf needs over 30 MC addresses.
  2) Add a policy knob to prevent enabling it from the PF.

Does it seem okay?

BTW, I'm bit worried about to use ndo interface for 2) because adding a
new hook makes core code complicated.
Is it really reasonable to do it with ndo?
I haven't find any other suitable method to do it, right now. And using
ndo VF hook looks standard way to control VF functionality.
Then, I think it's the best way to implement this policy in ndo hook.

> 
> In some ways it is almost the mirror image of the issue you brought up:
> 
> Adding a new hook for this seems over-complicated to me.  And it still
> doesn't solve the real problems that
>  a) the user has to know about this limit, and
>  b) manually configure the feature
> 
> My reverse argument might be that if this happens automatically.  It might take the VM provider a long time to realize
> performance has taken a hit because some VM asked to join 31 multicast groups and entered MC promiscuous.  Then only to
> find that they have no way to block such behavior.
> 
> Maybe I wouldn't as concerned if the patch author could provide some performance results to show this won't have as a
> negative effect as I'm afraid it might?

Hm, what kind of test case would you like to have?
The user A who has 30 MC addresses vs the user B who has 31 MC addresses, which
means that MC promiscuous mode, and coming MC packets the user doesn't expect?

thanks,
Hiroshi
------------------------------------------------------------------------------
New Year. New Location. New Benefits. New Data Center in Ashburn, VA.
GigeNET is offering a free month of service with a new server in Ashburn.
Choose from 2 high performing configs, both with 100TB of bandwidth.
Higher redundancy.Lower latency.Increased capacity.Completely compliant.
http://p.sf.net/sfu/gigenet
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-23  0:32                             ` Hiroshi Shimamoto
@ 2015-01-23  1:21                               ` Skidmore, Donald C
  -1 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-23  1:21 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Bjørn Mork
  Cc: David Laight, e1000-devel, netdev, Choi, Sy Jong, linux-kernel,
	Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 7240 bytes --]



> -----Original Message-----
> From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> Sent: Thursday, January 22, 2015 4:32 PM
> To: Skidmore, Donald C; Bjørn Mork
> Cc: David Laight; e1000-devel@lists.sourceforge.net;
> netdev@vger.kernel.org; Choi, Sy Jong; linux-kernel@vger.kernel.org;
> Hayato Momma
> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > promiscuous mode control
> > >
> > > "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> > >
> > > > My hang up is more related to: without the nob to enable it (off
> > > > by
> > > > default) we are letting one VF dictate policy for all the other
> > > > VFs and the PF.  If one VF needs to be in promiscuous multicast so
> > > > is everyone else.  Their stacks now needs to deal with all the
> > > > extra multicast packets.  As you point out this might not be a
> > > > direct concern for isolation in that the VM could have 'chosen' to
> > > > join any Multicast group and seen this traffic.  My concern over
> > > > isolation is one VF has chosen that all the other VM now have to
> > > > see this multicast traffic.
> > >
> > > Apologies if this question is stupid, but I just have to ask about
> > > stuff I don't understand...
> > >
> > > Looking at the proposed implementation, the promiscous multicast
> > > flag seems to be a per-VF flag:
> > >
> > > +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf,
> > > +bool
> > > +setting) {
> > > +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> > > +	struct ixgbe_hw *hw = &adapter->hw;
> > > +	u32 vmolr;
> > > +
> > > +	if (vf >= adapter->num_vfs)
> > > +		return -EINVAL;
> > > +
> > > +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> > > +
> > > +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> > > +	if (setting) {
> > > +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> > > +		vmolr |= IXGBE_VMOLR_MPE;
> > > +	} else {
> > > +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> > > +		vmolr &= ~IXGBE_VMOLR_MPE;
> > > +	}
> > > +
> > > +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > >
> > > I haven't read the data sheet, but I took a quick look at the
> > > excellent high level driver docs:
> > > http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-
> drive
> > > r-
> > > companion-guide.pdf
> > >
> > > It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> > > Customization" section:
> > >
> > >  7.1 Multicast Promiscuous Enable
> > >
> > >  The controller has provisions to allow each VF to be put into
> > > Multicast Promiscuous mode.  The Intel reference driver does not
> > > configure this option .
> > >
> > >  The capability can be enabled/disabled by manipulating the MPE
> > > field  (bit
> > > 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> > >
> > > and showing a section from the data sheet describing the "PF VM L2
> > > Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> > >
> > > To me it looks like enabling Promiscuos Multicast for a VF won't
> > > affect any other VF at all.  Is this really not the case?
> > >
> > >
> > >
> > > Bjørn
> >
> > Clearly not a dumb question at all and I'm glad you mentioned that. :)
> > I was going off the assumption, been awhile since I read the patch,
> > that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2 which would
> turn multicast promiscuous on for everyone.  Since the patch is using
> PFVML2FLT.MPE this lessens my concern over effect on the entire system.
> 
> I believe the patches for this VF multicast promiscuous mode is per VF.
> 
> >
> > That said I still would prefer having a way to override this behavior on the
> PF, although I admit my argument is weaker.
> > I'm still concerned about a VF changing the behavior of the PF without
> > any way to prevent it.  This might be one part philosophical (PF sets
> > policy not the VF) but this still could have a noticeable effect on
> > the overall system.  If any other VFs (or the PF) are receiving MC
> > packets these will have to be replicated which will be a performance
> > hit.  When we use the MC hash this is limited vs. when anyone is in MC
> promiscuous every MC packet used by another pool would be replicated.  I
> could imagine in some environments (i.e. public clouds) where you don't
> trust what is running in your VM you might what to block this from
> happening.
> 
> I understand your request and I'm thinking to submit the patches
>   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
>      and enables it when ixgbevf needs over 30 MC addresses.
>   2) Add a policy knob to prevent enabling it from the PF.
> 
> Does it seem okay?

This sounds totally fine to me.  That way users that need this functionality can get it while anyone concerned about side effects don't enable it.

> 
> BTW, I'm bit worried about to use ndo interface for 2) because adding a new
> hook makes core code complicated.
> Is it really reasonable to do it with ndo?
> I haven't find any other suitable method to do it, right now. And using ndo VF
> hook looks standard way to control VF functionality.

To be honest I had been thinking it would be enabled by PF.  Basically a hook saying the PF was willing to let any VF's go into MC promiscuous and thus so not bothering to brake it out by VF.  Since the advice effects I was worried about would be felt on all the VF's and PF even if it was enabled on only some VF's.   I imagine there would be some benefit to being able to control how many VF's were able to enter this mode but I would be ok with either.

> Then, I think it's the best way to implement this policy in ndo hook.

That seems reasonable to me.  If not I'm not sure what else would be any better, maybe ethtool --set-priv-flags but that wouldn't make much sense if you set it by VF.  Likewise I'm not sure how excited people would be about including policy like this in ethtool.

> 
> >
> > In some ways it is almost the mirror image of the issue you brought up:
> >
> > Adding a new hook for this seems over-complicated to me.  And it still
> > doesn't solve the real problems that
> >  a) the user has to know about this limit, and
> >  b) manually configure the feature
> >
> > My reverse argument might be that if this happens automatically.  It
> > might take the VM provider a long time to realize performance has
> > taken a hit because some VM asked to join 31 multicast groups and
> entered MC promiscuous.  Then only to find that they have no way to block
> such behavior.
> >
> > Maybe I wouldn't as concerned if the patch author could provide some
> > performance results to show this won't have as a negative effect as I'm
> afraid it might?
> 
> Hm, what kind of test case would you like to have?
> The user A who has 30 MC addresses vs the user B who has 31 MC addresses,
> which means that MC promiscuous mode, and coming MC packets the user
> doesn't expect?
> 
> thanks,
> Hiroshi
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-23  1:21                               ` Skidmore, Donald C
  0 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-23  1:21 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Bjørn Mork
  Cc: David Laight, e1000-devel, netdev, Choi, Sy Jong, linux-kernel,
	Hayato Momma



> -----Original Message-----
> From: Hiroshi Shimamoto [mailto:h-shimamoto@ct.jp.nec.com]
> Sent: Thursday, January 22, 2015 4:32 PM
> To: Skidmore, Donald C; Bjørn Mork
> Cc: David Laight; e1000-devel@lists.sourceforge.net;
> netdev@vger.kernel.org; Choi, Sy Jong; linux-kernel@vger.kernel.org;
> Hayato Momma
> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> > Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > promiscuous mode control
> > >
> > > "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> > >
> > > > My hang up is more related to: without the nob to enable it (off
> > > > by
> > > > default) we are letting one VF dictate policy for all the other
> > > > VFs and the PF.  If one VF needs to be in promiscuous multicast so
> > > > is everyone else.  Their stacks now needs to deal with all the
> > > > extra multicast packets.  As you point out this might not be a
> > > > direct concern for isolation in that the VM could have 'chosen' to
> > > > join any Multicast group and seen this traffic.  My concern over
> > > > isolation is one VF has chosen that all the other VM now have to
> > > > see this multicast traffic.
> > >
> > > Apologies if this question is stupid, but I just have to ask about
> > > stuff I don't understand...
> > >
> > > Looking at the proposed implementation, the promiscous multicast
> > > flag seems to be a per-VF flag:
> > >
> > > +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf,
> > > +bool
> > > +setting) {
> > > +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> > > +	struct ixgbe_hw *hw = &adapter->hw;
> > > +	u32 vmolr;
> > > +
> > > +	if (vf >= adapter->num_vfs)
> > > +		return -EINVAL;
> > > +
> > > +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> > > +
> > > +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> > > +	if (setting) {
> > > +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> > > +		vmolr |= IXGBE_VMOLR_MPE;
> > > +	} else {
> > > +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> > > +		vmolr &= ~IXGBE_VMOLR_MPE;
> > > +	}
> > > +
> > > +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > >
> > > I haven't read the data sheet, but I took a quick look at the
> > > excellent high level driver docs:
> > > http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-
> drive
> > > r-
> > > companion-guide.pdf
> > >
> > > It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> > > Customization" section:
> > >
> > >  7.1 Multicast Promiscuous Enable
> > >
> > >  The controller has provisions to allow each VF to be put into
> > > Multicast Promiscuous mode.  The Intel reference driver does not
> > > configure this option .
> > >
> > >  The capability can be enabled/disabled by manipulating the MPE
> > > field  (bit
> > > 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> > >
> > > and showing a section from the data sheet describing the "PF VM L2
> > > Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> > >
> > > To me it looks like enabling Promiscuos Multicast for a VF won't
> > > affect any other VF at all.  Is this really not the case?
> > >
> > >
> > >
> > > Bjørn
> >
> > Clearly not a dumb question at all and I'm glad you mentioned that. :)
> > I was going off the assumption, been awhile since I read the patch,
> > that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2 which would
> turn multicast promiscuous on for everyone.  Since the patch is using
> PFVML2FLT.MPE this lessens my concern over effect on the entire system.
> 
> I believe the patches for this VF multicast promiscuous mode is per VF.
> 
> >
> > That said I still would prefer having a way to override this behavior on the
> PF, although I admit my argument is weaker.
> > I'm still concerned about a VF changing the behavior of the PF without
> > any way to prevent it.  This might be one part philosophical (PF sets
> > policy not the VF) but this still could have a noticeable effect on
> > the overall system.  If any other VFs (or the PF) are receiving MC
> > packets these will have to be replicated which will be a performance
> > hit.  When we use the MC hash this is limited vs. when anyone is in MC
> promiscuous every MC packet used by another pool would be replicated.  I
> could imagine in some environments (i.e. public clouds) where you don't
> trust what is running in your VM you might what to block this from
> happening.
> 
> I understand your request and I'm thinking to submit the patches
>   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
>      and enables it when ixgbevf needs over 30 MC addresses.
>   2) Add a policy knob to prevent enabling it from the PF.
> 
> Does it seem okay?

This sounds totally fine to me.  That way users that need this functionality can get it while anyone concerned about side effects don't enable it.

> 
> BTW, I'm bit worried about to use ndo interface for 2) because adding a new
> hook makes core code complicated.
> Is it really reasonable to do it with ndo?
> I haven't find any other suitable method to do it, right now. And using ndo VF
> hook looks standard way to control VF functionality.

To be honest I had been thinking it would be enabled by PF.  Basically a hook saying the PF was willing to let any VF's go into MC promiscuous and thus so not bothering to brake it out by VF.  Since the advice effects I was worried about would be felt on all the VF's and PF even if it was enabled on only some VF's.   I imagine there would be some benefit to being able to control how many VF's were able to enter this mode but I would be ok with either.

> Then, I think it's the best way to implement this policy in ndo hook.

That seems reasonable to me.  If not I'm not sure what else would be any better, maybe ethtool --set-priv-flags but that wouldn't make much sense if you set it by VF.  Likewise I'm not sure how excited people would be about including policy like this in ethtool.

> 
> >
> > In some ways it is almost the mirror image of the issue you brought up:
> >
> > Adding a new hook for this seems over-complicated to me.  And it still
> > doesn't solve the real problems that
> >  a) the user has to know about this limit, and
> >  b) manually configure the feature
> >
> > My reverse argument might be that if this happens automatically.  It
> > might take the VM provider a long time to realize performance has
> > taken a hit because some VM asked to join 31 multicast groups and
> entered MC promiscuous.  Then only to find that they have no way to block
> such behavior.
> >
> > Maybe I wouldn't as concerned if the patch author could provide some
> > performance results to show this won't have as a negative effect as I'm
> afraid it might?
> 
> Hm, what kind of test case would you like to have?
> The user A who has 30 MC addresses vs the user B who has 31 MC addresses,
> which means that MC promiscuous mode, and coming MC packets the user
> doesn't expect?
> 
> thanks,
> Hiroshi

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-23  0:32                             ` Hiroshi Shimamoto
@ 2015-01-23  7:18                               ` Alexander Duyck
  -1 siblings, 0 replies; 46+ messages in thread
From: Alexander Duyck @ 2015-01-23  7:18 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, David Laight,
	Hayato Momma

On 01/22/2015 04:32 PM, Hiroshi Shimamoto wrote:
>> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
>>> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
>>>
>>>> My hang up is more related to: without the nob to enable it (off by
>>>> default) we are letting one VF dictate policy for all the other VFs
>>>> and the PF.  If one VF needs to be in promiscuous multicast so is
>>>> everyone else.  Their stacks now needs to deal with all the extra
>>>> multicast packets.  As you point out this might not be a direct
>>>> concern for isolation in that the VM could have 'chosen' to join any
>>>> Multicast group and seen this traffic.  My concern over isolation is
>>>> one VF has chosen that all the other VM now have to see this multicast
>>>> traffic.
>>> Apologies if this question is stupid, but I just have to ask about stuff I don't
>>> understand...
>>>
>>> Looking at the proposed implementation, the promiscous multicast flag
>>> seems to be a per-VF flag:
>>>
>>> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf, bool
>>> +setting) {
>>> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
>>> +	struct ixgbe_hw *hw = &adapter->hw;
>>> +	u32 vmolr;
>>> +
>>> +	if (vf >= adapter->num_vfs)
>>> +		return -EINVAL;
>>> +
>>> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
>>> +
>>> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
>>> +	if (setting) {
>>> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
>>> +		vmolr |= IXGBE_VMOLR_MPE;
>>> +	} else {
>>> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
>>> +		vmolr &= ~IXGBE_VMOLR_MPE;
>>> +	}
>>> +
>>> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>
>>> I haven't read the data sheet, but I took a quick look at the excellent high
>>> level driver docs:
>>> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-driver-
>>> companion-guide.pdf
>>>
>>> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
>>> Customization" section:
>>>
>>>  7.1 Multicast Promiscuous Enable
>>>
>>>  The controller has provisions to allow each VF to be put into Multicast
>>> Promiscuous mode.  The Intel reference driver does not configure this
>>> option .
>>>
>>>  The capability can be enabled/disabled by manipulating the MPE field  (bit
>>> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
>>>
>>> and showing a section from the data sheet describing the "PF VM L2 Control
>>> Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
>>>
>>> To me it looks like enabling Promiscuos Multicast for a VF won't affect any
>>> other VF at all.  Is this really not the case?
>>>
>>>
>>>
>>> Bjørn
>> Clearly not a dumb question at all and I'm glad you mentioned that. :)  I was going off the assumption, been awhile since
>> I read the patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2 which would turn multicast promiscuous on for
>> everyone.  Since the patch is using PFVML2FLT.MPE this lessens my concern over effect on the entire system.
> I believe the patches for this VF multicast promiscuous mode is per VF.
>
>> That said I still would prefer having a way to override this behavior on the PF, although I admit my argument is weaker.
>> I'm still concerned about a VF changing the behavior of the PF without any way to prevent it.  This might be one part
>> philosophical (PF sets policy not the VF) but this still could have a noticeable effect on the overall system.  If any
>> other VFs (or the PF) are receiving MC packets these will have to be replicated which will be a performance hit.  When
>> we use the MC hash this is limited vs. when anyone is in MC promiscuous every MC packet used by another pool would be
>> replicated.  I could imagine in some environments (i.e. public clouds) where you don't trust what is running in your VM
>> you might what to block this from happening.
> I understand your request and I'm thinking to submit the patches
>   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
>      and enables it when ixgbevf needs over 30 MC addresses.
>   2) Add a policy knob to prevent enabling it from the PF.
>
> Does it seem okay?

I would advise that if such a knob is added it should be set to disabled
by default.  The main problem with supporting that many multicast
addresses per VF is that you run the risk for flooding the PCIe
interface on the network adapter if too many adapters were to go into
such a mode.

> BTW, I'm bit worried about to use ndo interface for 2) because adding a
> new hook makes core code complicated.
> Is it really reasonable to do it with ndo?
> I haven't find any other suitable method to do it, right now. And using
> ndo VF hook looks standard way to control VF functionality.
> Then, I think it's the best way to implement this policy in ndo hook.

The ndo is probably the only way to go on this.  It is how past controls
for the VF network functionality have been implemented.

>> In some ways it is almost the mirror image of the issue you brought up:
>>
>> Adding a new hook for this seems over-complicated to me.  And it still
>> doesn't solve the real problems that
>>  a) the user has to know about this limit, and
>>  b) manually configure the feature
>>
>> My reverse argument might be that if this happens automatically.  It might take the VM provider a long time to realize
>> performance has taken a hit because some VM asked to join 31 multicast groups and entered MC promiscuous.  Then only to
>> find that they have no way to block such behavior.
>>
>> Maybe I wouldn't as concerned if the patch author could provide some performance results to show this won't have as a
>> negative effect as I'm afraid it might?
> Hm, what kind of test case would you like to have?
> The user A who has 30 MC addresses vs the user B who has 31 MC addresses, which
> means that MC promiscuous mode, and coming MC packets the user doesn't expect?
>
> thanks,
> Hiroshi

The question I would have is how many of these interfaces do you expect
to have supporting the expanded multicast mode?  As it currently stands
multicast traffic has the potential to flood the adapter, greatly reduce
the overall throughput, and add extra workload to the PF and all VFs. 
For example if several VFs enable this feature, and then someone on the
network sends a stream of multicast traffic what happens to the CPU load
for the host system?

Also how many addresses beyond 30 is it you require?  An alternative to
adding multicast promiscuous might be to consider extending the mailbox
API to support sending more than 30 addresses via something such as a
multi-part multicast configuration message.  The fm10k driver already
has logic similar to this as it adds addresses 1 at a time though a
separate Switch interface API between the PF and the Switch.  You might
be able to reuse some of that code to reach beyond the 30 address limit.

- Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-23  7:18                               ` Alexander Duyck
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Duyck @ 2015-01-23  7:18 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, David Laight,
	Hayato Momma

On 01/22/2015 04:32 PM, Hiroshi Shimamoto wrote:
>> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
>>> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
>>>
>>>> My hang up is more related to: without the nob to enable it (off by
>>>> default) we are letting one VF dictate policy for all the other VFs
>>>> and the PF.  If one VF needs to be in promiscuous multicast so is
>>>> everyone else.  Their stacks now needs to deal with all the extra
>>>> multicast packets.  As you point out this might not be a direct
>>>> concern for isolation in that the VM could have 'chosen' to join any
>>>> Multicast group and seen this traffic.  My concern over isolation is
>>>> one VF has chosen that all the other VM now have to see this multicast
>>>> traffic.
>>> Apologies if this question is stupid, but I just have to ask about stuff I don't
>>> understand...
>>>
>>> Looking at the proposed implementation, the promiscous multicast flag
>>> seems to be a per-VF flag:
>>>
>>> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf, bool
>>> +setting) {
>>> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
>>> +	struct ixgbe_hw *hw = &adapter->hw;
>>> +	u32 vmolr;
>>> +
>>> +	if (vf >= adapter->num_vfs)
>>> +		return -EINVAL;
>>> +
>>> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
>>> +
>>> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
>>> +	if (setting) {
>>> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
>>> +		vmolr |= IXGBE_VMOLR_MPE;
>>> +	} else {
>>> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
>>> +		vmolr &= ~IXGBE_VMOLR_MPE;
>>> +	}
>>> +
>>> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>
>>> I haven't read the data sheet, but I took a quick look at the excellent high
>>> level driver docs:
>>> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-driver-
>>> companion-guide.pdf
>>>
>>> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
>>> Customization" section:
>>>
>>>  7.1 Multicast Promiscuous Enable
>>>
>>>  The controller has provisions to allow each VF to be put into Multicast
>>> Promiscuous mode.  The Intel reference driver does not configure this
>>> option .
>>>
>>>  The capability can be enabled/disabled by manipulating the MPE field  (bit
>>> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
>>>
>>> and showing a section from the data sheet describing the "PF VM L2 Control
>>> Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
>>>
>>> To me it looks like enabling Promiscuos Multicast for a VF won't affect any
>>> other VF at all.  Is this really not the case?
>>>
>>>
>>>
>>> Bjørn
>> Clearly not a dumb question at all and I'm glad you mentioned that. :)  I was going off the assumption, been awhile since
>> I read the patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2 which would turn multicast promiscuous on for
>> everyone.  Since the patch is using PFVML2FLT.MPE this lessens my concern over effect on the entire system.
> I believe the patches for this VF multicast promiscuous mode is per VF.
>
>> That said I still would prefer having a way to override this behavior on the PF, although I admit my argument is weaker.
>> I'm still concerned about a VF changing the behavior of the PF without any way to prevent it.  This might be one part
>> philosophical (PF sets policy not the VF) but this still could have a noticeable effect on the overall system.  If any
>> other VFs (or the PF) are receiving MC packets these will have to be replicated which will be a performance hit.  When
>> we use the MC hash this is limited vs. when anyone is in MC promiscuous every MC packet used by another pool would be
>> replicated.  I could imagine in some environments (i.e. public clouds) where you don't trust what is running in your VM
>> you might what to block this from happening.
> I understand your request and I'm thinking to submit the patches
>   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
>      and enables it when ixgbevf needs over 30 MC addresses.
>   2) Add a policy knob to prevent enabling it from the PF.
>
> Does it seem okay?

I would advise that if such a knob is added it should be set to disabled
by default.  The main problem with supporting that many multicast
addresses per VF is that you run the risk for flooding the PCIe
interface on the network adapter if too many adapters were to go into
such a mode.

> BTW, I'm bit worried about to use ndo interface for 2) because adding a
> new hook makes core code complicated.
> Is it really reasonable to do it with ndo?
> I haven't find any other suitable method to do it, right now. And using
> ndo VF hook looks standard way to control VF functionality.
> Then, I think it's the best way to implement this policy in ndo hook.

The ndo is probably the only way to go on this.  It is how past controls
for the VF network functionality have been implemented.

>> In some ways it is almost the mirror image of the issue you brought up:
>>
>> Adding a new hook for this seems over-complicated to me.  And it still
>> doesn't solve the real problems that
>>  a) the user has to know about this limit, and
>>  b) manually configure the feature
>>
>> My reverse argument might be that if this happens automatically.  It might take the VM provider a long time to realize
>> performance has taken a hit because some VM asked to join 31 multicast groups and entered MC promiscuous.  Then only to
>> find that they have no way to block such behavior.
>>
>> Maybe I wouldn't as concerned if the patch author could provide some performance results to show this won't have as a
>> negative effect as I'm afraid it might?
> Hm, what kind of test case would you like to have?
> The user A who has 30 MC addresses vs the user B who has 31 MC addresses, which
> means that MC promiscuous mode, and coming MC packets the user doesn't expect?
>
> thanks,
> Hiroshi

The question I would have is how many of these interfaces do you expect
to have supporting the expanded multicast mode?  As it currently stands
multicast traffic has the potential to flood the adapter, greatly reduce
the overall throughput, and add extra workload to the PF and all VFs. 
For example if several VFs enable this feature, and then someone on the
network sends a stream of multicast traffic what happens to the CPU load
for the host system?

Also how many addresses beyond 30 is it you require?  An alternative to
adding multicast promiscuous might be to consider extending the mailbox
API to support sending more than 30 addresses via something such as a
multi-part multicast configuration message.  The fm10k driver already
has logic similar to this as it adds addresses 1 at a time though a
separate Switch interface API between the PF and the Switch.  You might
be able to reuse some of that code to reach beyond the 30 address limit.

- Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-23  7:18                               ` Alexander Duyck
@ 2015-01-23 18:24                                 ` Skidmore, Donald C
  -1 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-23 18:24 UTC (permalink / raw)
  To: Alexander Duyck, Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, David Laight,
	Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 8728 bytes --]



> -----Original Message-----
> From: Alexander Duyck [mailto:alexander.duyck@gmail.com]
> Sent: Thursday, January 22, 2015 11:18 PM
> To: Hiroshi Shimamoto; Skidmore, Donald C; Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; David Laight; Hayato Momma
> Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> On 01/22/2015 04:32 PM, Hiroshi Shimamoto wrote:
> >> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> >> promiscuous mode control
> >>> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> >>>
> >>>> My hang up is more related to: without the nob to enable it (off by
> >>>> default) we are letting one VF dictate policy for all the other VFs
> >>>> and the PF.  If one VF needs to be in promiscuous multicast so is
> >>>> everyone else.  Their stacks now needs to deal with all the extra
> >>>> multicast packets.  As you point out this might not be a direct
> >>>> concern for isolation in that the VM could have 'chosen' to join
> >>>> any Multicast group and seen this traffic.  My concern over
> >>>> isolation is one VF has chosen that all the other VM now have to
> >>>> see this multicast traffic.
> >>> Apologies if this question is stupid, but I just have to ask about
> >>> stuff I don't understand...
> >>>
> >>> Looking at the proposed implementation, the promiscous multicast
> >>> flag seems to be a per-VF flag:
> >>>
> >>> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf,
> >>> +bool
> >>> +setting) {
> >>> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> >>> +	struct ixgbe_hw *hw = &adapter->hw;
> >>> +	u32 vmolr;
> >>> +
> >>> +	if (vf >= adapter->num_vfs)
> >>> +		return -EINVAL;
> >>> +
> >>> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> >>> +
> >>> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> >>> +	if (setting) {
> >>> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> >>> +		vmolr |= IXGBE_VMOLR_MPE;
> >>> +	} else {
> >>> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> >>> +		vmolr &= ~IXGBE_VMOLR_MPE;
> >>> +	}
> >>> +
> >>> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>>
> >>> I haven't read the data sheet, but I took a quick look at the
> >>> excellent high level driver docs:
> >>> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-
> drive
> >>> r-
> >>> companion-guide.pdf
> >>>
> >>> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> >>> Customization" section:
> >>>
> >>>  7.1 Multicast Promiscuous Enable
> >>>
> >>>  The controller has provisions to allow each VF to be put into
> >>> Multicast Promiscuous mode.  The Intel reference driver does not
> >>> configure this option .
> >>>
> >>>  The capability can be enabled/disabled by manipulating the MPE
> >>> field  (bit
> >>> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> >>>
> >>> and showing a section from the data sheet describing the "PF VM L2
> >>> Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> >>>
> >>> To me it looks like enabling Promiscuos Multicast for a VF won't
> >>> affect any other VF at all.  Is this really not the case?
> >>>
> >>>
> >>>
> >>> Bjørn
> >> Clearly not a dumb question at all and I'm glad you mentioned that.
> >> :)  I was going off the assumption, been awhile since I read the
> >> patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2
> which would turn multicast promiscuous on for everyone.  Since the patch is
> using PFVML2FLT.MPE this lessens my concern over effect on the entire
> system.
> > I believe the patches for this VF multicast promiscuous mode is per VF.
> >
> >> That said I still would prefer having a way to override this behavior on the
> PF, although I admit my argument is weaker.
> >> I'm still concerned about a VF changing the behavior of the PF
> >> without any way to prevent it.  This might be one part philosophical
> >> (PF sets policy not the VF) but this still could have a noticeable
> >> effect on the overall system.  If any other VFs (or the PF) are
> >> receiving MC packets these will have to be replicated which will be a
> >> performance hit.  When we use the MC hash this is limited vs. when
> anyone is in MC promiscuous every MC packet used by another pool would
> be replicated.  I could imagine in some environments (i.e. public clouds)
> where you don't trust what is running in your VM you might what to block
> this from happening.
> > I understand your request and I'm thinking to submit the patches
> >   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
> >      and enables it when ixgbevf needs over 30 MC addresses.
> >   2) Add a policy knob to prevent enabling it from the PF.
> >
> > Does it seem okay?
> 
> I would advise that if such a knob is added it should be set to disabled by
> default.  The main problem with supporting that many multicast addresses
> per VF is that you run the risk for flooding the PCIe interface on the network
> adapter if too many adapters were to go into such a mode.
> 

This was my understanding as well.  Someone would have to "choose" to allow VM to enter this mode, meaning off by default.

> > BTW, I'm bit worried about to use ndo interface for 2) because adding
> > a new hook makes core code complicated.
> > Is it really reasonable to do it with ndo?
> > I haven't find any other suitable method to do it, right now. And
> > using ndo VF hook looks standard way to control VF functionality.
> > Then, I think it's the best way to implement this policy in ndo hook.
> 
> The ndo is probably the only way to go on this.  It is how past controls for the
> VF network functionality have been implemented.
> 
> >> In some ways it is almost the mirror image of the issue you brought up:
> >>
> >> Adding a new hook for this seems over-complicated to me.  And it
> >> still doesn't solve the real problems that
> >>  a) the user has to know about this limit, and
> >>  b) manually configure the feature
> >>
> >> My reverse argument might be that if this happens automatically.  It
> >> might take the VM provider a long time to realize performance has
> >> taken a hit because some VM asked to join 31 multicast groups and
> entered MC promiscuous.  Then only to find that they have no way to block
> such behavior.
> >>
> >> Maybe I wouldn't as concerned if the patch author could provide some
> >> performance results to show this won't have as a negative effect as I'm
> afraid it might?
> > Hm, what kind of test case would you like to have?
> > The user A who has 30 MC addresses vs the user B who has 31 MC
> > addresses, which means that MC promiscuous mode, and coming MC
> packets the user doesn't expect?
> >
> > thanks,
> > Hiroshi
> 
> The question I would have is how many of these interfaces do you expect to
> have supporting the expanded multicast mode?  As it currently stands
> multicast traffic has the potential to flood the adapter, greatly reduce the
> overall throughput, and add extra workload to the PF and all VFs.
> For example if several VFs enable this feature, and then someone on the
> network sends a stream of multicast traffic what happens to the CPU load for
> the host system?
> 
> Also how many addresses beyond 30 is it you require?  An alternative to
> adding multicast promiscuous might be to consider extending the mailbox
> API to support sending more than 30 addresses via something such as a
> multi-part multicast configuration message.  The fm10k driver already has
> logic similar to this as it adds addresses 1 at a time though a separate Switch
> interface API between the PF and the Switch.  You might be able to reuse
> some of that code to reach beyond the 30 address limit.

I think this would be a much safer approach and probably scale well for small sets.  However I don't think it would work for Hiroshi's use case.  If I remember correctly he wanted 1000's of MC groups per VM.   I imagine there would be considerable overhead even loading up our 4K hash table 1 address at a time, likewise with that many address wouldn't this just be pretty much the same as being in multicast promiscuous.  Still this might be an interesting approach to support those needing only a few MC groups over the limit.

Alex's point is worth reiterating, this will effect performance.  Have you tested this out under load and still see the results you can live with?

-Don Skidmore <donald.c.skidmore@intel.com>

> 
> - Alex
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-23 18:24                                 ` Skidmore, Donald C
  0 siblings, 0 replies; 46+ messages in thread
From: Skidmore, Donald C @ 2015-01-23 18:24 UTC (permalink / raw)
  To: Alexander Duyck, Hiroshi Shimamoto, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, David Laight,
	Hayato Momma



> -----Original Message-----
> From: Alexander Duyck [mailto:alexander.duyck@gmail.com]
> Sent: Thursday, January 22, 2015 11:18 PM
> To: Hiroshi Shimamoto; Skidmore, Donald C; Bjørn Mork
> Cc: e1000-devel@lists.sourceforge.net; netdev@vger.kernel.org; Choi, Sy
> Jong; linux-kernel@vger.kernel.org; David Laight; Hayato Momma
> Subject: Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous
> mode control
> 
> On 01/22/2015 04:32 PM, Hiroshi Shimamoto wrote:
> >> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> >> promiscuous mode control
> >>> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> >>>
> >>>> My hang up is more related to: without the nob to enable it (off by
> >>>> default) we are letting one VF dictate policy for all the other VFs
> >>>> and the PF.  If one VF needs to be in promiscuous multicast so is
> >>>> everyone else.  Their stacks now needs to deal with all the extra
> >>>> multicast packets.  As you point out this might not be a direct
> >>>> concern for isolation in that the VM could have 'chosen' to join
> >>>> any Multicast group and seen this traffic.  My concern over
> >>>> isolation is one VF has chosen that all the other VM now have to
> >>>> see this multicast traffic.
> >>> Apologies if this question is stupid, but I just have to ask about
> >>> stuff I don't understand...
> >>>
> >>> Looking at the proposed implementation, the promiscous multicast
> >>> flag seems to be a per-VF flag:
> >>>
> >>> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf,
> >>> +bool
> >>> +setting) {
> >>> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> >>> +	struct ixgbe_hw *hw = &adapter->hw;
> >>> +	u32 vmolr;
> >>> +
> >>> +	if (vf >= adapter->num_vfs)
> >>> +		return -EINVAL;
> >>> +
> >>> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> >>> +
> >>> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> >>> +	if (setting) {
> >>> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> >>> +		vmolr |= IXGBE_VMOLR_MPE;
> >>> +	} else {
> >>> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> >>> +		vmolr &= ~IXGBE_VMOLR_MPE;
> >>> +	}
> >>> +
> >>> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>>
> >>> I haven't read the data sheet, but I took a quick look at the
> >>> excellent high level driver docs:
> >>> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-
> drive
> >>> r-
> >>> companion-guide.pdf
> >>>
> >>> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> >>> Customization" section:
> >>>
> >>>  7.1 Multicast Promiscuous Enable
> >>>
> >>>  The controller has provisions to allow each VF to be put into
> >>> Multicast Promiscuous mode.  The Intel reference driver does not
> >>> configure this option .
> >>>
> >>>  The capability can be enabled/disabled by manipulating the MPE
> >>> field  (bit
> >>> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> >>>
> >>> and showing a section from the data sheet describing the "PF VM L2
> >>> Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> >>>
> >>> To me it looks like enabling Promiscuos Multicast for a VF won't
> >>> affect any other VF at all.  Is this really not the case?
> >>>
> >>>
> >>>
> >>> Bjørn
> >> Clearly not a dumb question at all and I'm glad you mentioned that.
> >> :)  I was going off the assumption, been awhile since I read the
> >> patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2
> which would turn multicast promiscuous on for everyone.  Since the patch is
> using PFVML2FLT.MPE this lessens my concern over effect on the entire
> system.
> > I believe the patches for this VF multicast promiscuous mode is per VF.
> >
> >> That said I still would prefer having a way to override this behavior on the
> PF, although I admit my argument is weaker.
> >> I'm still concerned about a VF changing the behavior of the PF
> >> without any way to prevent it.  This might be one part philosophical
> >> (PF sets policy not the VF) but this still could have a noticeable
> >> effect on the overall system.  If any other VFs (or the PF) are
> >> receiving MC packets these will have to be replicated which will be a
> >> performance hit.  When we use the MC hash this is limited vs. when
> anyone is in MC promiscuous every MC packet used by another pool would
> be replicated.  I could imagine in some environments (i.e. public clouds)
> where you don't trust what is running in your VM you might what to block
> this from happening.
> > I understand your request and I'm thinking to submit the patches
> >   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
> >      and enables it when ixgbevf needs over 30 MC addresses.
> >   2) Add a policy knob to prevent enabling it from the PF.
> >
> > Does it seem okay?
> 
> I would advise that if such a knob is added it should be set to disabled by
> default.  The main problem with supporting that many multicast addresses
> per VF is that you run the risk for flooding the PCIe interface on the network
> adapter if too many adapters were to go into such a mode.
> 

This was my understanding as well.  Someone would have to "choose" to allow VM to enter this mode, meaning off by default.

> > BTW, I'm bit worried about to use ndo interface for 2) because adding
> > a new hook makes core code complicated.
> > Is it really reasonable to do it with ndo?
> > I haven't find any other suitable method to do it, right now. And
> > using ndo VF hook looks standard way to control VF functionality.
> > Then, I think it's the best way to implement this policy in ndo hook.
> 
> The ndo is probably the only way to go on this.  It is how past controls for the
> VF network functionality have been implemented.
> 
> >> In some ways it is almost the mirror image of the issue you brought up:
> >>
> >> Adding a new hook for this seems over-complicated to me.  And it
> >> still doesn't solve the real problems that
> >>  a) the user has to know about this limit, and
> >>  b) manually configure the feature
> >>
> >> My reverse argument might be that if this happens automatically.  It
> >> might take the VM provider a long time to realize performance has
> >> taken a hit because some VM asked to join 31 multicast groups and
> entered MC promiscuous.  Then only to find that they have no way to block
> such behavior.
> >>
> >> Maybe I wouldn't as concerned if the patch author could provide some
> >> performance results to show this won't have as a negative effect as I'm
> afraid it might?
> > Hm, what kind of test case would you like to have?
> > The user A who has 30 MC addresses vs the user B who has 31 MC
> > addresses, which means that MC promiscuous mode, and coming MC
> packets the user doesn't expect?
> >
> > thanks,
> > Hiroshi
> 
> The question I would have is how many of these interfaces do you expect to
> have supporting the expanded multicast mode?  As it currently stands
> multicast traffic has the potential to flood the adapter, greatly reduce the
> overall throughput, and add extra workload to the PF and all VFs.
> For example if several VFs enable this feature, and then someone on the
> network sends a stream of multicast traffic what happens to the CPU load for
> the host system?
> 
> Also how many addresses beyond 30 is it you require?  An alternative to
> adding multicast promiscuous might be to consider extending the mailbox
> API to support sending more than 30 addresses via something such as a
> multi-part multicast configuration message.  The fm10k driver already has
> logic similar to this as it adds addresses 1 at a time though a separate Switch
> interface API between the PF and the Switch.  You might be able to reuse
> some of that code to reach beyond the 30 address limit.

I think this would be a much safer approach and probably scale well for small sets.  However I don't think it would work for Hiroshi's use case.  If I remember correctly he wanted 1000's of MC groups per VM.   I imagine there would be considerable overhead even loading up our 4K hash table 1 address at a time, likewise with that many address wouldn't this just be pretty much the same as being in multicast promiscuous.  Still this might be an interesting approach to support those needing only a few MC groups over the limit.

Alex's point is worth reiterating, this will effect performance.  Have you tested this out under load and still see the results you can live with?

-Don Skidmore <donald.c.skidmore@intel.com>

> 
> - Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-23 18:24                                 ` Skidmore, Donald C
@ 2015-01-27 12:53                                   ` Hiroshi Shimamoto
  -1 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-27 12:53 UTC (permalink / raw)
  To: Skidmore, Donald C, Alexander Duyck, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, David Laight,
	Hayato Momma

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 9528 bytes --]

> > On 01/22/2015 04:32 PM, Hiroshi Shimamoto wrote:
> > >> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > >> promiscuous mode control
> > >>> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> > >>>
> > >>>> My hang up is more related to: without the nob to enable it (off by
> > >>>> default) we are letting one VF dictate policy for all the other VFs
> > >>>> and the PF.  If one VF needs to be in promiscuous multicast so is
> > >>>> everyone else.  Their stacks now needs to deal with all the extra
> > >>>> multicast packets.  As you point out this might not be a direct
> > >>>> concern for isolation in that the VM could have 'chosen' to join
> > >>>> any Multicast group and seen this traffic.  My concern over
> > >>>> isolation is one VF has chosen that all the other VM now have to
> > >>>> see this multicast traffic.
> > >>> Apologies if this question is stupid, but I just have to ask about
> > >>> stuff I don't understand...
> > >>>
> > >>> Looking at the proposed implementation, the promiscous multicast
> > >>> flag seems to be a per-VF flag:
> > >>>
> > >>> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf,
> > >>> +bool
> > >>> +setting) {
> > >>> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> > >>> +	struct ixgbe_hw *hw = &adapter->hw;
> > >>> +	u32 vmolr;
> > >>> +
> > >>> +	if (vf >= adapter->num_vfs)
> > >>> +		return -EINVAL;
> > >>> +
> > >>> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> > >>> +
> > >>> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> > >>> +	if (setting) {
> > >>> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> > >>> +		vmolr |= IXGBE_VMOLR_MPE;
> > >>> +	} else {
> > >>> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> > >>> +		vmolr &= ~IXGBE_VMOLR_MPE;
> > >>> +	}
> > >>> +
> > >>> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> > >>> +
> > >>> +	return 0;
> > >>> +}
> > >>> +
> > >>>
> > >>> I haven't read the data sheet, but I took a quick look at the
> > >>> excellent high level driver docs:
> > >>> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-
> > drive
> > >>> r-
> > >>> companion-guide.pdf
> > >>>
> > >>> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> > >>> Customization" section:
> > >>>
> > >>>  7.1 Multicast Promiscuous Enable
> > >>>
> > >>>  The controller has provisions to allow each VF to be put into
> > >>> Multicast Promiscuous mode.  The Intel reference driver does not
> > >>> configure this option .
> > >>>
> > >>>  The capability can be enabled/disabled by manipulating the MPE
> > >>> field  (bit
> > >>> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> > >>>
> > >>> and showing a section from the data sheet describing the "PF VM L2
> > >>> Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> > >>>
> > >>> To me it looks like enabling Promiscuos Multicast for a VF won't
> > >>> affect any other VF at all.  Is this really not the case?
> > >>>
> > >>>
> > >>>
> > >>> Bjørn
> > >> Clearly not a dumb question at all and I'm glad you mentioned that.
> > >> :)  I was going off the assumption, been awhile since I read the
> > >> patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2
> > which would turn multicast promiscuous on for everyone.  Since the patch is
> > using PFVML2FLT.MPE this lessens my concern over effect on the entire
> > system.
> > > I believe the patches for this VF multicast promiscuous mode is per VF.
> > >
> > >> That said I still would prefer having a way to override this behavior on the
> > PF, although I admit my argument is weaker.
> > >> I'm still concerned about a VF changing the behavior of the PF
> > >> without any way to prevent it.  This might be one part philosophical
> > >> (PF sets policy not the VF) but this still could have a noticeable
> > >> effect on the overall system.  If any other VFs (or the PF) are
> > >> receiving MC packets these will have to be replicated which will be a
> > >> performance hit.  When we use the MC hash this is limited vs. when
> > anyone is in MC promiscuous every MC packet used by another pool would
> > be replicated.  I could imagine in some environments (i.e. public clouds)
> > where you don't trust what is running in your VM you might what to block
> > this from happening.
> > > I understand your request and I'm thinking to submit the patches
> > >   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
> > >      and enables it when ixgbevf needs over 30 MC addresses.
> > >   2) Add a policy knob to prevent enabling it from the PF.
> > >
> > > Does it seem okay?
> >
> > I would advise that if such a knob is added it should be set to disabled by
> > default.  The main problem with supporting that many multicast addresses
> > per VF is that you run the risk for flooding the PCIe interface on the network
> > adapter if too many adapters were to go into such a mode.
> >
> 
> This was my understanding as well.  Someone would have to "choose" to allow VM to enter this mode, meaning off by default.

I see you'd like to keep it off unless the administrator wants to configure.

> 
> > > BTW, I'm bit worried about to use ndo interface for 2) because adding
> > > a new hook makes core code complicated.
> > > Is it really reasonable to do it with ndo?
> > > I haven't find any other suitable method to do it, right now. And
> > > using ndo VF hook looks standard way to control VF functionality.
> > > Then, I think it's the best way to implement this policy in ndo hook.
> >
> > The ndo is probably the only way to go on this.  It is how past controls for the
> > VF network functionality have been implemented.
> >
> > >> In some ways it is almost the mirror image of the issue you brought up:
> > >>
> > >> Adding a new hook for this seems over-complicated to me.  And it
> > >> still doesn't solve the real problems that
> > >>  a) the user has to know about this limit, and
> > >>  b) manually configure the feature
> > >>
> > >> My reverse argument might be that if this happens automatically.  It
> > >> might take the VM provider a long time to realize performance has
> > >> taken a hit because some VM asked to join 31 multicast groups and
> > entered MC promiscuous.  Then only to find that they have no way to block
> > such behavior.
> > >>
> > >> Maybe I wouldn't as concerned if the patch author could provide some
> > >> performance results to show this won't have as a negative effect as I'm
> > afraid it might?
> > > Hm, what kind of test case would you like to have?
> > > The user A who has 30 MC addresses vs the user B who has 31 MC
> > > addresses, which means that MC promiscuous mode, and coming MC
> > packets the user doesn't expect?
> > >
> > > thanks,
> > > Hiroshi
> >
> > The question I would have is how many of these interfaces do you expect to
> > have supporting the expanded multicast mode?  As it currently stands

Right now, we're thinking 1 or 2 VFs per PF will be enabled this feature
in our use case. Another representation, 1 or 2 VMs will be used VF device on
one server, each VM has 2 VFs usually.

> > multicast traffic has the potential to flood the adapter, greatly reduce the
> > overall throughput, and add extra workload to the PF and all VFs.
> > For example if several VFs enable this feature, and then someone on the
> > network sends a stream of multicast traffic what happens to the CPU load for
> > the host system?

I'm not sure why all VFs are affected.
I can imagine that PF and VFs which are enabled this feature could receive
flooding packets, but other VFs never receive such packets.

> >
> > Also how many addresses beyond 30 is it you require?  An alternative to
> > adding multicast promiscuous might be to consider extending the mailbox
> > API to support sending more than 30 addresses via something such as a
> > multi-part multicast configuration message.  The fm10k driver already has
> > logic similar to this as it adds addresses 1 at a time though a separate Switch
> > interface API between the PF and the Switch.  You might be able to reuse
> > some of that code to reach beyond the 30 address limit.
> 
> I think this would be a much safer approach and probably scale well for small sets.  However I don't think it would work
> for Hiroshi's use case.  If I remember correctly he wanted 1000's of MC groups per VM.   I imagine there would be considerable
> overhead even loading up our 4K hash table 1 address at a time, likewise with that many address wouldn't this just be
> pretty much the same as being in multicast promiscuous.  Still this might be an interesting approach to support those
> needing only a few MC groups over the limit.

We would like to use 2000 IPs usually, and would extend to 4k which comes from VLAN limit.
IIRC, mbox API has only 32 words, it doesn't scale, I think.
And there is a single hash table per PF, not per VF. I guess if we set 4k hash entries
from a specific VF, every VF will receive all MC packets.

> 
> Alex's point is worth reiterating, this will effect performance.  Have you tested this out under load and still see the
> results you can live with?

I don't have any number in MC load.
In general case, we have enough performance in VM, almost wire rate in our scenario.

thanks,
Hiroshi

> 
> -Don Skidmore <donald.c.skidmore@intel.com>
> 
> >
> > - Alex
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-27 12:53                                   ` Hiroshi Shimamoto
  0 siblings, 0 replies; 46+ messages in thread
From: Hiroshi Shimamoto @ 2015-01-27 12:53 UTC (permalink / raw)
  To: Skidmore, Donald C, Alexander Duyck, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, David Laight,
	Hayato Momma

> > On 01/22/2015 04:32 PM, Hiroshi Shimamoto wrote:
> > >> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
> > >> promiscuous mode control
> > >>> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
> > >>>
> > >>>> My hang up is more related to: without the nob to enable it (off by
> > >>>> default) we are letting one VF dictate policy for all the other VFs
> > >>>> and the PF.  If one VF needs to be in promiscuous multicast so is
> > >>>> everyone else.  Their stacks now needs to deal with all the extra
> > >>>> multicast packets.  As you point out this might not be a direct
> > >>>> concern for isolation in that the VM could have 'chosen' to join
> > >>>> any Multicast group and seen this traffic.  My concern over
> > >>>> isolation is one VF has chosen that all the other VM now have to
> > >>>> see this multicast traffic.
> > >>> Apologies if this question is stupid, but I just have to ask about
> > >>> stuff I don't understand...
> > >>>
> > >>> Looking at the proposed implementation, the promiscous multicast
> > >>> flag seems to be a per-VF flag:
> > >>>
> > >>> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf,
> > >>> +bool
> > >>> +setting) {
> > >>> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
> > >>> +	struct ixgbe_hw *hw = &adapter->hw;
> > >>> +	u32 vmolr;
> > >>> +
> > >>> +	if (vf >= adapter->num_vfs)
> > >>> +		return -EINVAL;
> > >>> +
> > >>> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
> > >>> +
> > >>> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
> > >>> +	if (setting) {
> > >>> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
> > >>> +		vmolr |= IXGBE_VMOLR_MPE;
> > >>> +	} else {
> > >>> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
> > >>> +		vmolr &= ~IXGBE_VMOLR_MPE;
> > >>> +	}
> > >>> +
> > >>> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
> > >>> +
> > >>> +	return 0;
> > >>> +}
> > >>> +
> > >>>
> > >>> I haven't read the data sheet, but I took a quick look at the
> > >>> excellent high level driver docs:
> > >>> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-
> > drive
> > >>> r-
> > >>> companion-guide.pdf
> > >>>
> > >>> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
> > >>> Customization" section:
> > >>>
> > >>>  7.1 Multicast Promiscuous Enable
> > >>>
> > >>>  The controller has provisions to allow each VF to be put into
> > >>> Multicast Promiscuous mode.  The Intel reference driver does not
> > >>> configure this option .
> > >>>
> > >>>  The capability can be enabled/disabled by manipulating the MPE
> > >>> field  (bit
> > >>> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
> > >>>
> > >>> and showing a section from the data sheet describing the "PF VM L2
> > >>> Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
> > >>>
> > >>> To me it looks like enabling Promiscuos Multicast for a VF won't
> > >>> affect any other VF at all.  Is this really not the case?
> > >>>
> > >>>
> > >>>
> > >>> Bjørn
> > >> Clearly not a dumb question at all and I'm glad you mentioned that.
> > >> :)  I was going off the assumption, been awhile since I read the
> > >> patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2
> > which would turn multicast promiscuous on for everyone.  Since the patch is
> > using PFVML2FLT.MPE this lessens my concern over effect on the entire
> > system.
> > > I believe the patches for this VF multicast promiscuous mode is per VF.
> > >
> > >> That said I still would prefer having a way to override this behavior on the
> > PF, although I admit my argument is weaker.
> > >> I'm still concerned about a VF changing the behavior of the PF
> > >> without any way to prevent it.  This might be one part philosophical
> > >> (PF sets policy not the VF) but this still could have a noticeable
> > >> effect on the overall system.  If any other VFs (or the PF) are
> > >> receiving MC packets these will have to be replicated which will be a
> > >> performance hit.  When we use the MC hash this is limited vs. when
> > anyone is in MC promiscuous every MC packet used by another pool would
> > be replicated.  I could imagine in some environments (i.e. public clouds)
> > where you don't trust what is running in your VM you might what to block
> > this from happening.
> > > I understand your request and I'm thinking to submit the patches
> > >   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
> > >      and enables it when ixgbevf needs over 30 MC addresses.
> > >   2) Add a policy knob to prevent enabling it from the PF.
> > >
> > > Does it seem okay?
> >
> > I would advise that if such a knob is added it should be set to disabled by
> > default.  The main problem with supporting that many multicast addresses
> > per VF is that you run the risk for flooding the PCIe interface on the network
> > adapter if too many adapters were to go into such a mode.
> >
> 
> This was my understanding as well.  Someone would have to "choose" to allow VM to enter this mode, meaning off by default.

I see you'd like to keep it off unless the administrator wants to configure.

> 
> > > BTW, I'm bit worried about to use ndo interface for 2) because adding
> > > a new hook makes core code complicated.
> > > Is it really reasonable to do it with ndo?
> > > I haven't find any other suitable method to do it, right now. And
> > > using ndo VF hook looks standard way to control VF functionality.
> > > Then, I think it's the best way to implement this policy in ndo hook.
> >
> > The ndo is probably the only way to go on this.  It is how past controls for the
> > VF network functionality have been implemented.
> >
> > >> In some ways it is almost the mirror image of the issue you brought up:
> > >>
> > >> Adding a new hook for this seems over-complicated to me.  And it
> > >> still doesn't solve the real problems that
> > >>  a) the user has to know about this limit, and
> > >>  b) manually configure the feature
> > >>
> > >> My reverse argument might be that if this happens automatically.  It
> > >> might take the VM provider a long time to realize performance has
> > >> taken a hit because some VM asked to join 31 multicast groups and
> > entered MC promiscuous.  Then only to find that they have no way to block
> > such behavior.
> > >>
> > >> Maybe I wouldn't as concerned if the patch author could provide some
> > >> performance results to show this won't have as a negative effect as I'm
> > afraid it might?
> > > Hm, what kind of test case would you like to have?
> > > The user A who has 30 MC addresses vs the user B who has 31 MC
> > > addresses, which means that MC promiscuous mode, and coming MC
> > packets the user doesn't expect?
> > >
> > > thanks,
> > > Hiroshi
> >
> > The question I would have is how many of these interfaces do you expect to
> > have supporting the expanded multicast mode?  As it currently stands

Right now, we're thinking 1 or 2 VFs per PF will be enabled this feature
in our use case. Another representation, 1 or 2 VMs will be used VF device on
one server, each VM has 2 VFs usually.

> > multicast traffic has the potential to flood the adapter, greatly reduce the
> > overall throughput, and add extra workload to the PF and all VFs.
> > For example if several VFs enable this feature, and then someone on the
> > network sends a stream of multicast traffic what happens to the CPU load for
> > the host system?

I'm not sure why all VFs are affected.
I can imagine that PF and VFs which are enabled this feature could receive
flooding packets, but other VFs never receive such packets.

> >
> > Also how many addresses beyond 30 is it you require?  An alternative to
> > adding multicast promiscuous might be to consider extending the mailbox
> > API to support sending more than 30 addresses via something such as a
> > multi-part multicast configuration message.  The fm10k driver already has
> > logic similar to this as it adds addresses 1 at a time though a separate Switch
> > interface API between the PF and the Switch.  You might be able to reuse
> > some of that code to reach beyond the 30 address limit.
> 
> I think this would be a much safer approach and probably scale well for small sets.  However I don't think it would work
> for Hiroshi's use case.  If I remember correctly he wanted 1000's of MC groups per VM.   I imagine there would be considerable
> overhead even loading up our 4K hash table 1 address at a time, likewise with that many address wouldn't this just be
> pretty much the same as being in multicast promiscuous.  Still this might be an interesting approach to support those
> needing only a few MC groups over the limit.

We would like to use 2000 IPs usually, and would extend to 4k which comes from VLAN limit.
IIRC, mbox API has only 32 words, it doesn't scale, I think.
And there is a single hash table per PF, not per VF. I guess if we set 4k hash entries
from a specific VF, every VF will receive all MC packets.

> 
> Alex's point is worth reiterating, this will effect performance.  Have you tested this out under load and still see the
> results you can live with?

I don't have any number in MC load.
In general case, we have enough performance in VM, almost wire rate in our scenario.

thanks,
Hiroshi

> 
> -Don Skidmore <donald.c.skidmore@intel.com>
> 
> >
> > - Alex
------------------------------------------------------------------------------
Dive into the World of Parallel Programming. The Go Parallel Website,
sponsored by Intel and developed in partnership with Slashdot Media, is your
hub for all things parallel software development, from weekly thought
leadership blogs to news, videos, case studies, tutorials and more. Take a
look and join the conversation now. http://goparallel.sourceforge.net/
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
  2015-01-27 12:53                                   ` Hiroshi Shimamoto
@ 2015-01-27 17:34                                     ` Alexander Duyck
  -1 siblings, 0 replies; 46+ messages in thread
From: Alexander Duyck @ 2015-01-27 17:34 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, David Laight,
	Hayato Momma

On 01/27/2015 04:53 AM, Hiroshi Shimamoto wrote:
>>> On 01/22/2015 04:32 PM, Hiroshi Shimamoto wrote:
>>>>> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
>>>>> promiscuous mode control
>>>>>> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
>>>>>>
>>>>>>> My hang up is more related to: without the nob to enable it (off by
>>>>>>> default) we are letting one VF dictate policy for all the other VFs
>>>>>>> and the PF.  If one VF needs to be in promiscuous multicast so is
>>>>>>> everyone else.  Their stacks now needs to deal with all the extra
>>>>>>> multicast packets.  As you point out this might not be a direct
>>>>>>> concern for isolation in that the VM could have 'chosen' to join
>>>>>>> any Multicast group and seen this traffic.  My concern over
>>>>>>> isolation is one VF has chosen that all the other VM now have to
>>>>>>> see this multicast traffic.
>>>>>> Apologies if this question is stupid, but I just have to ask about
>>>>>> stuff I don't understand...
>>>>>>
>>>>>> Looking at the proposed implementation, the promiscous multicast
>>>>>> flag seems to be a per-VF flag:
>>>>>>
>>>>>> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf,
>>>>>> +bool
>>>>>> +setting) {
>>>>>> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
>>>>>> +	struct ixgbe_hw *hw = &adapter->hw;
>>>>>> +	u32 vmolr;
>>>>>> +
>>>>>> +	if (vf >= adapter->num_vfs)
>>>>>> +		return -EINVAL;
>>>>>> +
>>>>>> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
>>>>>> +
>>>>>> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
>>>>>> +	if (setting) {
>>>>>> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
>>>>>> +		vmolr |= IXGBE_VMOLR_MPE;
>>>>>> +	} else {
>>>>>> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
>>>>>> +		vmolr &= ~IXGBE_VMOLR_MPE;
>>>>>> +	}
>>>>>> +
>>>>>> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
>>>>>> +
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +
>>>>>>
>>>>>> I haven't read the data sheet, but I took a quick look at the
>>>>>> excellent high level driver docs:
>>>>>> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-
>>> drive
>>>>>> r-
>>>>>> companion-guide.pdf
>>>>>>
>>>>>> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
>>>>>> Customization" section:
>>>>>>
>>>>>>  7.1 Multicast Promiscuous Enable
>>>>>>
>>>>>>  The controller has provisions to allow each VF to be put into
>>>>>> Multicast Promiscuous mode.  The Intel reference driver does not
>>>>>> configure this option .
>>>>>>
>>>>>>  The capability can be enabled/disabled by manipulating the MPE
>>>>>> field  (bit
>>>>>> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
>>>>>>
>>>>>> and showing a section from the data sheet describing the "PF VM L2
>>>>>> Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
>>>>>>
>>>>>> To me it looks like enabling Promiscuos Multicast for a VF won't
>>>>>> affect any other VF at all.  Is this really not the case?
>>>>>>
>>>>>>
>>>>>>
>>>>>> Bjørn
>>>>> Clearly not a dumb question at all and I'm glad you mentioned that.
>>>>> :)  I was going off the assumption, been awhile since I read the
>>>>> patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2
>>> which would turn multicast promiscuous on for everyone.  Since the patch is
>>> using PFVML2FLT.MPE this lessens my concern over effect on the entire
>>> system.
>>>> I believe the patches for this VF multicast promiscuous mode is per VF.
>>>>
>>>>> That said I still would prefer having a way to override this behavior on the
>>> PF, although I admit my argument is weaker.
>>>>> I'm still concerned about a VF changing the behavior of the PF
>>>>> without any way to prevent it.  This might be one part philosophical
>>>>> (PF sets policy not the VF) but this still could have a noticeable
>>>>> effect on the overall system.  If any other VFs (or the PF) are
>>>>> receiving MC packets these will have to be replicated which will be a
>>>>> performance hit.  When we use the MC hash this is limited vs. when
>>> anyone is in MC promiscuous every MC packet used by another pool would
>>> be replicated.  I could imagine in some environments (i.e. public clouds)
>>> where you don't trust what is running in your VM you might what to block
>>> this from happening.
>>>> I understand your request and I'm thinking to submit the patches
>>>>   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
>>>>      and enables it when ixgbevf needs over 30 MC addresses.
>>>>   2) Add a policy knob to prevent enabling it from the PF.
>>>>
>>>> Does it seem okay?
>>> I would advise that if such a knob is added it should be set to disabled by
>>> default.  The main problem with supporting that many multicast addresses
>>> per VF is that you run the risk for flooding the PCIe interface on the network
>>> adapter if too many adapters were to go into such a mode.
>>>
>> This was my understanding as well.  Someone would have to "choose" to allow VM to enter this mode, meaning off by default.
> I see you'd like to keep it off unless the administrator wants to configure.

Correct.  This is a somewhat risky configuration so it is best to
control who has access to it.

One additional thought is that you may want to make it so that the VF
has some control over it from its side as well.  In the case of the igb
driver I believe there is already a control for setting the multicast
promiscuous in the event of exceeding 30 multicast addresses.  You may
want to look at that for ideas on how to have a mailbox message work in
conjunction with your control to allow enabling/disabling the multicast
promiscuous mode.

>>>> BTW, I'm bit worried about to use ndo interface for 2) because adding
>>>> a new hook makes core code complicated.
>>>> Is it really reasonable to do it with ndo?
>>>> I haven't find any other suitable method to do it, right now. And
>>>> using ndo VF hook looks standard way to control VF functionality.
>>>> Then, I think it's the best way to implement this policy in ndo hook.
>>> The ndo is probably the only way to go on this.  It is how past controls for the
>>> VF network functionality have been implemented.
>>>
>>>>> In some ways it is almost the mirror image of the issue you brought up:
>>>>>
>>>>> Adding a new hook for this seems over-complicated to me.  And it
>>>>> still doesn't solve the real problems that
>>>>>  a) the user has to know about this limit, and
>>>>>  b) manually configure the feature
>>>>>
>>>>> My reverse argument might be that if this happens automatically.  It
>>>>> might take the VM provider a long time to realize performance has
>>>>> taken a hit because some VM asked to join 31 multicast groups and
>>> entered MC promiscuous.  Then only to find that they have no way to block
>>> such behavior.
>>>>> Maybe I wouldn't as concerned if the patch author could provide some
>>>>> performance results to show this won't have as a negative effect as I'm
>>> afraid it might?
>>>> Hm, what kind of test case would you like to have?
>>>> The user A who has 30 MC addresses vs the user B who has 31 MC
>>>> addresses, which means that MC promiscuous mode, and coming MC
>>> packets the user doesn't expect?
>>>> thanks,
>>>> Hiroshi
>>> The question I would have is how many of these interfaces do you expect to
>>> have supporting the expanded multicast mode?  As it currently stands
> Right now, we're thinking 1 or 2 VFs per PF will be enabled this feature
> in our use case. Another representation, 1 or 2 VMs will be used VF device on
> one server, each VM has 2 VFs usually.

This seems reasonable.  If it is only one or two VFs then you probably
would be better of using the promiscuous mode.

>>> multicast traffic has the potential to flood the adapter, greatly reduce the
>>> overall throughput, and add extra workload to the PF and all VFs.
>>> For example if several VFs enable this feature, and then someone on the
>>> network sends a stream of multicast traffic what happens to the CPU load for
>>> the host system?
> I'm not sure why all VFs are affected.
> I can imagine that PF and VFs which are enabled this feature could receive
> flooding packets, but other VFs never receive such packets.

The other VFs would't be receiving the traffic, they would just have
reduced access to traffic because for each VF that can receive a packet
it multiplies the PCIe bandwidth.  So if you have 4 VFs in multicast
promiscuous mode then that means for each multicast frame receive the
PCIe bus will have to pass the frame 4 times in order to provide a copy
to each VF.   When you consider that the 82599 can support 64 VFs this
replication can have a huge impact on the PCIe bus bandwidth.

>>> Also how many addresses beyond 30 is it you require?  An alternative to
>>> adding multicast promiscuous might be to consider extending the mailbox
>>> API to support sending more than 30 addresses via something such as a
>>> multi-part multicast configuration message.  The fm10k driver already has
>>> logic similar to this as it adds addresses 1 at a time though a separate Switch
>>> interface API between the PF and the Switch.  You might be able to reuse
>>> some of that code to reach beyond the 30 address limit.
>> I think this would be a much safer approach and probably scale well for small sets.  However I don't think it would work
>> for Hiroshi's use case.  If I remember correctly he wanted 1000's of MC groups per VM.   I imagine there would be considerable
>> overhead even loading up our 4K hash table 1 address at a time, likewise with that many address wouldn't this just be
>> pretty much the same as being in multicast promiscuous.  Still this might be an interesting approach to support those
>> needing only a few MC groups over the limit.
> We would like to use 2000 IPs usually, and would extend to 4k which comes from VLAN limit.
> IIRC, mbox API has only 32 words, it doesn't scale, I think.
> And there is a single hash table per PF, not per VF. I guess if we set 4k hash entries
> from a specific VF, every VF will receive all MC packets.

This is true.  So if you only have one or two VFs that you expect to
need that many entries it might be preferable to go the multicast
promiscuous route to avoid excess replication.

>> Alex's point is worth reiterating, this will effect performance.  Have you tested this out under load and still see the
>> results you can live with?
> I don't have any number in MC load.
> In general case, we have enough performance in VM, almost wire rate in our scenario.
>
> thanks,
> Hiroshi
>

I'd say your best bet for this is to make it both an NDO (defaulted to
disabled) and a mailbox request.  If both are enabling it then enable
the feature.  That way you can avoid possibly sending unexpected packets
to a legacy VF, or allowing a VF to enable it on a system that hasn't
been configured for it.

- Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast promiscuous mode control
@ 2015-01-27 17:34                                     ` Alexander Duyck
  0 siblings, 0 replies; 46+ messages in thread
From: Alexander Duyck @ 2015-01-27 17:34 UTC (permalink / raw)
  To: Hiroshi Shimamoto, Skidmore, Donald C, Bjørn Mork
  Cc: e1000-devel, netdev, Choi, Sy Jong, linux-kernel, David Laight,
	Hayato Momma

On 01/27/2015 04:53 AM, Hiroshi Shimamoto wrote:
>>> On 01/22/2015 04:32 PM, Hiroshi Shimamoto wrote:
>>>>> Subject: RE: [E1000-devel] [PATCH 1/2] if_link: Add VF multicast
>>>>> promiscuous mode control
>>>>>> "Skidmore, Donald C" <donald.c.skidmore@intel.com> writes:
>>>>>>
>>>>>>> My hang up is more related to: without the nob to enable it (off by
>>>>>>> default) we are letting one VF dictate policy for all the other VFs
>>>>>>> and the PF.  If one VF needs to be in promiscuous multicast so is
>>>>>>> everyone else.  Their stacks now needs to deal with all the extra
>>>>>>> multicast packets.  As you point out this might not be a direct
>>>>>>> concern for isolation in that the VM could have 'chosen' to join
>>>>>>> any Multicast group and seen this traffic.  My concern over
>>>>>>> isolation is one VF has chosen that all the other VM now have to
>>>>>>> see this multicast traffic.
>>>>>> Apologies if this question is stupid, but I just have to ask about
>>>>>> stuff I don't understand...
>>>>>>
>>>>>> Looking at the proposed implementation, the promiscous multicast
>>>>>> flag seems to be a per-VF flag:
>>>>>>
>>>>>> +int ixgbe_ndo_set_vf_mc_promisc(struct net_device *netdev, int vf,
>>>>>> +bool
>>>>>> +setting) {
>>>>>> +	struct ixgbe_adapter *adapter = netdev_priv(netdev);
>>>>>> +	struct ixgbe_hw *hw = &adapter->hw;
>>>>>> +	u32 vmolr;
>>>>>> +
>>>>>> +	if (vf >= adapter->num_vfs)
>>>>>> +		return -EINVAL;
>>>>>> +
>>>>>> +	adapter->vfinfo[vf].mc_promisc_enabled = setting;
>>>>>> +
>>>>>> +	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
>>>>>> +	if (setting) {
>>>>>> +		e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
>>>>>> +		vmolr |= IXGBE_VMOLR_MPE;
>>>>>> +	} else {
>>>>>> +		e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
>>>>>> +		vmolr &= ~IXGBE_VMOLR_MPE;
>>>>>> +	}
>>>>>> +
>>>>>> +	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
>>>>>> +
>>>>>> +	return 0;
>>>>>> +}
>>>>>> +
>>>>>>
>>>>>> I haven't read the data sheet, but I took a quick look at the
>>>>>> excellent high level driver docs:
>>>>>> http://www.intel.com/content/dam/doc/design-guide/82599-sr-iov-
>>> drive
>>>>>> r-
>>>>>> companion-guide.pdf
>>>>>>
>>>>>> It mentions "Multicast Promiscuous Enable" in its "Thoughts for
>>>>>> Customization" section:
>>>>>>
>>>>>>  7.1 Multicast Promiscuous Enable
>>>>>>
>>>>>>  The controller has provisions to allow each VF to be put into
>>>>>> Multicast Promiscuous mode.  The Intel reference driver does not
>>>>>> configure this option .
>>>>>>
>>>>>>  The capability can be enabled/disabled by manipulating the MPE
>>>>>> field  (bit
>>>>>> 28) of the PF VF L2 Control Register (PFVML2FLT – 0x0F000)
>>>>>>
>>>>>> and showing a section from the data sheet describing the "PF VM L2
>>>>>> Control Register - PFVML2FLT[n]  (0x0F000 + 4 * n, n=0...63; RW)"
>>>>>>
>>>>>> To me it looks like enabling Promiscuos Multicast for a VF won't
>>>>>> affect any other VF at all.  Is this really not the case?
>>>>>>
>>>>>>
>>>>>>
>>>>>> Bjørn
>>>>> Clearly not a dumb question at all and I'm glad you mentioned that.
>>>>> :)  I was going off the assumption, been awhile since I read the
>>>>> patch, that the patch was using FCTRL.MPE or MANC.MCST_PASS_L2
>>> which would turn multicast promiscuous on for everyone.  Since the patch is
>>> using PFVML2FLT.MPE this lessens my concern over effect on the entire
>>> system.
>>>> I believe the patches for this VF multicast promiscuous mode is per VF.
>>>>
>>>>> That said I still would prefer having a way to override this behavior on the
>>> PF, although I admit my argument is weaker.
>>>>> I'm still concerned about a VF changing the behavior of the PF
>>>>> without any way to prevent it.  This might be one part philosophical
>>>>> (PF sets policy not the VF) but this still could have a noticeable
>>>>> effect on the overall system.  If any other VFs (or the PF) are
>>>>> receiving MC packets these will have to be replicated which will be a
>>>>> performance hit.  When we use the MC hash this is limited vs. when
>>> anyone is in MC promiscuous every MC packet used by another pool would
>>> be replicated.  I could imagine in some environments (i.e. public clouds)
>>> where you don't trust what is running in your VM you might what to block
>>> this from happening.
>>>> I understand your request and I'm thinking to submit the patches
>>>>   1) Add new mbox API between ixgbe/ixgbevf to turn MC promiscuous on,
>>>>      and enables it when ixgbevf needs over 30 MC addresses.
>>>>   2) Add a policy knob to prevent enabling it from the PF.
>>>>
>>>> Does it seem okay?
>>> I would advise that if such a knob is added it should be set to disabled by
>>> default.  The main problem with supporting that many multicast addresses
>>> per VF is that you run the risk for flooding the PCIe interface on the network
>>> adapter if too many adapters were to go into such a mode.
>>>
>> This was my understanding as well.  Someone would have to "choose" to allow VM to enter this mode, meaning off by default.
> I see you'd like to keep it off unless the administrator wants to configure.

Correct.  This is a somewhat risky configuration so it is best to
control who has access to it.

One additional thought is that you may want to make it so that the VF
has some control over it from its side as well.  In the case of the igb
driver I believe there is already a control for setting the multicast
promiscuous in the event of exceeding 30 multicast addresses.  You may
want to look at that for ideas on how to have a mailbox message work in
conjunction with your control to allow enabling/disabling the multicast
promiscuous mode.

>>>> BTW, I'm bit worried about to use ndo interface for 2) because adding
>>>> a new hook makes core code complicated.
>>>> Is it really reasonable to do it with ndo?
>>>> I haven't find any other suitable method to do it, right now. And
>>>> using ndo VF hook looks standard way to control VF functionality.
>>>> Then, I think it's the best way to implement this policy in ndo hook.
>>> The ndo is probably the only way to go on this.  It is how past controls for the
>>> VF network functionality have been implemented.
>>>
>>>>> In some ways it is almost the mirror image of the issue you brought up:
>>>>>
>>>>> Adding a new hook for this seems over-complicated to me.  And it
>>>>> still doesn't solve the real problems that
>>>>>  a) the user has to know about this limit, and
>>>>>  b) manually configure the feature
>>>>>
>>>>> My reverse argument might be that if this happens automatically.  It
>>>>> might take the VM provider a long time to realize performance has
>>>>> taken a hit because some VM asked to join 31 multicast groups and
>>> entered MC promiscuous.  Then only to find that they have no way to block
>>> such behavior.
>>>>> Maybe I wouldn't as concerned if the patch author could provide some
>>>>> performance results to show this won't have as a negative effect as I'm
>>> afraid it might?
>>>> Hm, what kind of test case would you like to have?
>>>> The user A who has 30 MC addresses vs the user B who has 31 MC
>>>> addresses, which means that MC promiscuous mode, and coming MC
>>> packets the user doesn't expect?
>>>> thanks,
>>>> Hiroshi
>>> The question I would have is how many of these interfaces do you expect to
>>> have supporting the expanded multicast mode?  As it currently stands
> Right now, we're thinking 1 or 2 VFs per PF will be enabled this feature
> in our use case. Another representation, 1 or 2 VMs will be used VF device on
> one server, each VM has 2 VFs usually.

This seems reasonable.  If it is only one or two VFs then you probably
would be better of using the promiscuous mode.

>>> multicast traffic has the potential to flood the adapter, greatly reduce the
>>> overall throughput, and add extra workload to the PF and all VFs.
>>> For example if several VFs enable this feature, and then someone on the
>>> network sends a stream of multicast traffic what happens to the CPU load for
>>> the host system?
> I'm not sure why all VFs are affected.
> I can imagine that PF and VFs which are enabled this feature could receive
> flooding packets, but other VFs never receive such packets.

The other VFs would't be receiving the traffic, they would just have
reduced access to traffic because for each VF that can receive a packet
it multiplies the PCIe bandwidth.  So if you have 4 VFs in multicast
promiscuous mode then that means for each multicast frame receive the
PCIe bus will have to pass the frame 4 times in order to provide a copy
to each VF.   When you consider that the 82599 can support 64 VFs this
replication can have a huge impact on the PCIe bus bandwidth.

>>> Also how many addresses beyond 30 is it you require?  An alternative to
>>> adding multicast promiscuous might be to consider extending the mailbox
>>> API to support sending more than 30 addresses via something such as a
>>> multi-part multicast configuration message.  The fm10k driver already has
>>> logic similar to this as it adds addresses 1 at a time though a separate Switch
>>> interface API between the PF and the Switch.  You might be able to reuse
>>> some of that code to reach beyond the 30 address limit.
>> I think this would be a much safer approach and probably scale well for small sets.  However I don't think it would work
>> for Hiroshi's use case.  If I remember correctly he wanted 1000's of MC groups per VM.   I imagine there would be considerable
>> overhead even loading up our 4K hash table 1 address at a time, likewise with that many address wouldn't this just be
>> pretty much the same as being in multicast promiscuous.  Still this might be an interesting approach to support those
>> needing only a few MC groups over the limit.
> We would like to use 2000 IPs usually, and would extend to 4k which comes from VLAN limit.
> IIRC, mbox API has only 32 words, it doesn't scale, I think.
> And there is a single hash table per PF, not per VF. I guess if we set 4k hash entries
> from a specific VF, every VF will receive all MC packets.

This is true.  So if you only have one or two VFs that you expect to
need that many entries it might be preferable to go the multicast
promiscuous route to avoid excess replication.

>> Alex's point is worth reiterating, this will effect performance.  Have you tested this out under load and still see the
>> results you can live with?
> I don't have any number in MC load.
> In general case, we have enough performance in VM, almost wire rate in our scenario.
>
> thanks,
> Hiroshi
>

I'd say your best bet for this is to make it both an NDO (defaulted to
disabled) and a mailbox request.  If both are enabling it then enable
the feature.  That way you can avoid possibly sending unexpected packets
to a legacy VF, or allowing a VF to enable it on a system that hasn't
been configured for it.

- Alex

^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2015-01-27 17:34 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-20 10:50 [PATCH 1/2] if_link: Add VF multicast promiscuous mode control Hiroshi Shimamoto
2015-01-20 10:50 ` Hiroshi Shimamoto
2015-01-20 11:42 ` Bjørn Mork
2015-01-20 11:42   ` Bjørn Mork
2015-01-20 23:40   ` Hiroshi Shimamoto
2015-01-20 23:40     ` Hiroshi Shimamoto
2015-01-21  0:26     ` [E1000-devel] " Skidmore, Donald C
2015-01-21  0:26       ` Skidmore, Donald C
2015-01-21  1:07       ` Hiroshi Shimamoto
2015-01-21  1:07         ` Hiroshi Shimamoto
2015-01-21  1:30         ` [E1000-devel] " Skidmore, Donald C
2015-01-21  1:30           ` Skidmore, Donald C
2015-01-21 11:38           ` Hiroshi Shimamoto
2015-01-21 11:38             ` Hiroshi Shimamoto
2015-01-21 11:56             ` [E1000-devel] " David Laight
2015-01-21 11:56               ` David Laight
2015-01-21 12:18               ` Hiroshi Shimamoto
2015-01-21 12:18                 ` Hiroshi Shimamoto
2015-01-21 20:27                 ` [E1000-devel] " Skidmore, Donald C
2015-01-21 20:27                   ` Skidmore, Donald C
2015-01-22  9:50                   ` [E1000-devel] " David Laight
2015-01-22  9:50                     ` David Laight
2015-01-22 10:52                     ` Jeff Kirsher
2015-01-22 10:52                       ` Jeff Kirsher
2015-01-22 19:00                     ` [E1000-devel] " Skidmore, Donald C
2015-01-22 19:00                       ` Skidmore, Donald C
2015-01-22 19:31                       ` Bjørn Mork
2015-01-22 19:31                         ` Bjørn Mork
2015-01-22 21:34                         ` Skidmore, Donald C
2015-01-22 21:34                           ` Skidmore, Donald C
2015-01-23  0:32                           ` Hiroshi Shimamoto
2015-01-23  0:32                             ` Hiroshi Shimamoto
2015-01-23  1:21                             ` [E1000-devel] " Skidmore, Donald C
2015-01-23  1:21                               ` Skidmore, Donald C
2015-01-23  7:18                             ` Alexander Duyck
2015-01-23  7:18                               ` Alexander Duyck
2015-01-23 18:24                               ` Skidmore, Donald C
2015-01-23 18:24                                 ` Skidmore, Donald C
2015-01-27 12:53                                 ` Hiroshi Shimamoto
2015-01-27 12:53                                   ` Hiroshi Shimamoto
2015-01-27 17:34                                   ` [E1000-devel] " Alexander Duyck
2015-01-27 17:34                                     ` Alexander Duyck
2015-01-21  9:26     ` Bjørn Mork
2015-01-21  9:26       ` Bjørn Mork
2015-01-20 11:53 ` David Laight
2015-01-20 11:53   ` David Laight

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.