All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
@ 2016-08-04 16:49 Sridhar Samudrala
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 2/5] i40e: Introduce VF representor/control netdevs Sridhar Samudrala
                   ` (6 more replies)
  0 siblings, 7 replies; 18+ messages in thread
From: Sridhar Samudrala @ 2016-08-04 16:49 UTC (permalink / raw)
  To: intel-wired-lan

Add initial devlink support to set/get the mode of SRIOV switch.
This patch allows the mode to be set to either 'legacy' or 'switchdev', but
doesn't implement any functionality to create vf representors in switchdev
mode.

With smode support in iproute2 'devlink' utility, switch mode can be set
and get via following commands.

    # devlink dev smode pci/0000:05:00.0
    mode: legacy
    # devlink dev set pci/0000:05:00.0 smode switchdev
    # devlink dev smode pci/0000:05:00.0
    mode: switchdev

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/Kconfig          |  1 +
 drivers/net/ethernet/intel/i40e/i40e.h      |  3 ++
 drivers/net/ethernet/intel/i40e/i40e_main.c | 84 ++++++++++++++++++++++++++---
 3 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index c0e1743..2ede229 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -215,6 +215,7 @@ config I40E
 	tristate "Intel(R) Ethernet Controller XL710 Family support"
 	select PTP_1588_CLOCK
 	depends on PCI
+	depends on MAY_USE_DEVLINK
 	---help---
 	  This driver supports Intel(R) Ethernet Controller XL710 Family of
 	  devices.  For more information on how to identify your adapter, go
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 2a88291..724bd82 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -53,6 +53,8 @@
 #include <linux/clocksource.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
+#include <net/devlink.h>
+
 #include "i40e_type.h"
 #include "i40e_prototype.h"
 #ifdef I40E_FCOE
@@ -446,6 +448,7 @@ struct i40e_pf {
 	u32 ioremap_len;
 	u32 fd_inv;
 	u16 phy_led_val;
+	enum devlink_eswitch_mode eswitch_mode;
 };
 
 enum i40e_filter_state {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 339d99b..bb44f09 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -27,6 +27,7 @@
 #include <linux/etherdevice.h>
 #include <linux/of_net.h>
 #include <linux/pci.h>
+#include <net/devlink.h>
 
 /* Local includes */
 #include "i40e.h"
@@ -10671,6 +10672,58 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
 }
 
 /**
+ * i40e_devlink_eswitch_mode_get
+ *
+ * @devlink: pointer to devlink struct
+ * @mode: sr-iov switch mode pointer
+ *
+ * Returns the switch mode of the associated PF in the @mode pointer.
+ */
+static int i40e_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct i40e_pf *pf = devlink_priv(devlink);
+
+	*mode = pf->eswitch_mode;
+
+	return 0;
+}
+
+/**
+ * i40e_devlink_eswitch_mode_set
+ *
+ * @devlink: pointer to devlink struct
+ * @mode: sr-iov switch mode
+ *
+ * Set the switch mode of the associated PF.
+ * Returns 0 on success and -EOPNOTSUPP on error.
+ */
+static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct i40e_pf *pf = devlink_priv(devlink);
+	int err = 0;
+
+	if (mode == pf->eswitch_mode)
+		goto done;
+
+	switch (mode) {
+	case DEVLINK_ESWITCH_MODE_LEGACY:
+	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+		pf->eswitch_mode = mode;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
+done:
+	return err;
+}
+
+static const struct devlink_ops i40e_devlink_ops = {
+	.eswitch_mode_get = i40e_devlink_eswitch_mode_get,
+	.eswitch_mode_set = i40e_devlink_eswitch_mode_set,
+};
+
+/**
  * i40e_probe - Device initialization routine
  * @pdev: PCI device information struct
  * @ent: entry in i40e_pci_tbl
@@ -10687,6 +10740,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct i40e_pf *pf;
 	struct i40e_hw *hw;
 	static u16 pfs_found;
+	struct devlink *devlink;
 	u16 wol_nvm_bits;
 	u16 link_status;
 	int err;
@@ -10721,16 +10775,21 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_enable_pcie_error_reporting(pdev);
 	pci_set_master(pdev);
 
+	devlink = devlink_alloc(&i40e_devlink_ops, sizeof(*pf));
+	if (!devlink) {
+		dev_err(&pdev->dev,
+			"devlink_alloc failed\n");
+		err = -ENOMEM;
+		goto err_devlink_alloc;
+	}
+
 	/* Now that we have a PCI connection, we need to do the
 	 * low level device setup.  This is primarily setting up
 	 * the Admin Queue structures and then querying for the
 	 * device's current profile information.
 	 */
-	pf = kzalloc(sizeof(*pf), GFP_KERNEL);
-	if (!pf) {
-		err = -ENOMEM;
-		goto err_pf_alloc;
-	}
+	pf = devlink_priv(devlink);
+
 	pf->next_vsi = 0;
 	pf->pdev = pdev;
 	set_bit(__I40E_DOWN, &pf->state);
@@ -11047,6 +11106,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		}
 	}
 
+	pf->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+	err = devlink_register(devlink, &pdev->dev);
+	if (err)
+		goto err_devlink_register;
+
 #ifdef CONFIG_PCI_IOV
 	/* prep for VF support */
 	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
@@ -11188,6 +11252,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
 	/* Unwind what we've done if something failed in the setup */
+err_devlink_register:
+	i40e_stop_misc_vector(pf);
 err_vsis:
 	set_bit(__I40E_DOWN, &pf->state);
 	i40e_clear_interrupt_scheme(pf);
@@ -11205,8 +11271,8 @@ err_adminq_setup:
 err_pf_reset:
 	iounmap(hw->hw_addr);
 err_ioremap:
-	kfree(pf);
-err_pf_alloc:
+	devlink_free(devlink);
+err_devlink_alloc:
 	pci_disable_pcie_error_reporting(pdev);
 	pci_release_selected_regions(pdev,
 				     pci_select_bars(pdev, IORESOURCE_MEM));
@@ -11229,9 +11295,11 @@ static void i40e_remove(struct pci_dev *pdev)
 {
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
 	struct i40e_hw *hw = &pf->hw;
+	struct devlink *devlink = priv_to_devlink(pf);
 	i40e_status ret_code;
 	int i;
 
+	devlink_unregister(devlink);
 	i40e_dbg_pf_exit(pf);
 
 	i40e_ptp_stop(pf);
@@ -11319,7 +11387,7 @@ static void i40e_remove(struct pci_dev *pdev)
 	kfree(pf->vsi);
 
 	iounmap(hw->hw_addr);
-	kfree(pf);
+	devlink_free(devlink);
 	pci_release_selected_regions(pdev,
 				     pci_select_bars(pdev, IORESOURCE_MEM));
 
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 2/5] i40e: Introduce VF representor/control netdevs.
  2016-08-04 16:49 [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Sridhar Samudrala
@ 2016-08-04 16:49 ` Sridhar Samudrala
  2016-08-10  0:18   ` Nambiar, Amritha
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 3/5] i40e: Enable VF specific ethtool statistics via VF representor netdevs Sridhar Samudrala
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Sridhar Samudrala @ 2016-08-04 16:49 UTC (permalink / raw)
  To: intel-wired-lan

VF representor/control netdevs are created for each VF if the switch mode
is set to 'switchdev'. These netdevs can be used to control and configure
VFs from PFs namespace. They enable exposing VF statistics, configuring
ntuple filters, additional mac/vlans to VFs etc.

Sample script to create VF representors
    # devlink dev set pci/0000:05:00.0 smode switchdev
    # echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
    # ip l show
    297: enp5s0f0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop portid 6805ca2e7268 state DOWN mode DEFAULT group default qlen 1000
    link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off
    vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off
    299: enp5s0f0-vf0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
    300: enp5s0f0-vf1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c        | 13 +++-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 90 ++++++++++++++++++++++
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 14 ++++
 3 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index bb44f09..208da9f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10700,14 +10700,25 @@ static int i40e_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
 	struct i40e_pf *pf = devlink_priv(devlink);
-	int err = 0;
+	struct i40e_vf *vf;
+	int i, err = 0;
 
 	if (mode == pf->eswitch_mode)
 		goto done;
 
 	switch (mode) {
 	case DEVLINK_ESWITCH_MODE_LEGACY:
+		for (i = 0; i < pf->num_alloc_vfs; i++) {
+			vf = &(pf->vf[i]);
+			i40e_free_vf_netdev(vf);
+		}
+		pf->eswitch_mode = mode;
+		break;
 	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+		for (i = 0; i < pf->num_alloc_vfs; i++) {
+			vf = &(pf->vf[i]);
+			i40e_alloc_vf_netdev(vf, i);
+		}
 		pf->eswitch_mode = mode;
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 4bd66d7..d41c37c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1000,6 +1000,90 @@ complete_reset:
 	clear_bit(__I40E_VF_DISABLE, &pf->state);
 }
 
+static int i40e_vf_netdev_open(struct net_device *dev)
+{
+	return 0;
+}
+
+static int i40e_vf_netdev_stop(struct net_device *dev)
+{
+	return 0;
+}
+
+static const struct net_device_ops i40e_vf_netdev_ops = {
+	.ndo_open       = i40e_vf_netdev_open,
+	.ndo_stop       = i40e_vf_netdev_stop,
+};
+
+/**
+ * i40e_alloc_vf_netdev
+ * @vf: pointer to the VF structure
+ * @vf_num: VF number
+ *
+ * Create VF representor/control netdev
+ **/
+int i40e_alloc_vf_netdev(struct i40e_vf *vf, u16 vf_num)
+{
+	struct net_device *netdev;
+	char netdev_name[IFNAMSIZ];
+	struct i40e_vf_netdev_priv *priv;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	int err;
+
+	snprintf(netdev_name, IFNAMSIZ, "%s-vf%d", vsi->netdev->name, vf_num);
+	netdev = alloc_netdev(sizeof(struct i40e_vf_netdev_priv), netdev_name,
+			      NET_NAME_UNKNOWN, ether_setup);
+	if (!netdev) {
+		dev_err(&pf->pdev->dev, "alloc_netdev failed for vf:%d\n",
+			vf_num);
+		return -ENOMEM;
+	}
+
+	pf->vf[vf_num].ctrl_netdev = netdev;
+
+	priv = netdev_priv(netdev);
+	priv->vf = &(pf->vf[vf_num]);
+
+	netdev->netdev_ops = &i40e_vf_netdev_ops;
+
+	netif_carrier_off(netdev);
+	netif_tx_disable(netdev);
+
+	err = register_netdev(netdev);
+	if (err) {
+		dev_err(&pf->pdev->dev, "register_netdev failed for vf: %s\n",
+			vf->ctrl_netdev->name);
+		free_netdev(netdev);
+		return err;
+	}
+
+	dev_info(&pf->pdev->dev, "VF representor(%s) created for VF %d\n",
+		 vf->ctrl_netdev->name, vf_num);
+
+	return 0;
+}
+
+/**
+ * i40e_free_vf_netdev
+ * @vf: pointer to the VF structure
+ *
+ * Free VF representor/control netdev
+ **/
+void i40e_free_vf_netdev(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+
+	if (!vf->ctrl_netdev)
+		return;
+
+	dev_info(&pf->pdev->dev, "Freeing VF representor(%s)\n",
+		 vf->ctrl_netdev->name);
+
+	unregister_netdev(vf->ctrl_netdev);
+	free_netdev(vf->ctrl_netdev);
+}
+
 /**
  * i40e_free_vfs
  * @pf: pointer to the PF structure
@@ -1042,6 +1126,9 @@ void i40e_free_vfs(struct i40e_pf *pf)
 			i40e_free_vf_res(&pf->vf[i]);
 		/* disable qp mappings */
 		i40e_disable_vf_mappings(&pf->vf[i]);
+
+		if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+			i40e_free_vf_netdev(&pf->vf[i]);
 	}
 
 	kfree(pf->vf);
@@ -1110,6 +1197,9 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
 		/* VF resources get allocated during reset */
 		i40e_reset_vf(&vfs[i], false);
 
+		if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+			i40e_alloc_vf_netdev(&vfs[i], i);
+
 	}
 	pf->num_alloc_vfs = num_alloc_vfs;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 8751741..8446547 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -72,10 +72,21 @@ enum i40e_vf_capabilities {
 	I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
+/* VF Ctrl netdev private structure */
+struct i40e_vf_netdev_priv {
+	struct i40e_vf *vf;
+};
+
 /* VF information structure */
 struct i40e_vf {
 	struct i40e_pf *pf;
 
+	/* VF representor netdev that allows control and configuration of
+	 * VFs from the host. Enables returning VF stats, configuring ntuple
+	 * filters on VFs, adding multiple mac/vlans to VFs etc.
+	 */
+	struct net_device *ctrl_netdev;
+
 	/* VF id in the PF space */
 	s16 vf_id;
 	/* all VF vsis connect to the same parent */
@@ -142,4 +153,7 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable);
 void i40e_vc_notify_link_state(struct i40e_pf *pf);
 void i40e_vc_notify_reset(struct i40e_pf *pf);
 
+int i40e_alloc_vf_netdev(struct i40e_vf *vf, u16 vf_num);
+void i40e_free_vf_netdev(struct i40e_vf *vf);
+
 #endif /* _I40E_VIRTCHNL_PF_H_ */
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 3/5] i40e: Enable VF specific ethtool statistics via VF representor netdevs.
  2016-08-04 16:49 [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Sridhar Samudrala
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 2/5] i40e: Introduce VF representor/control netdevs Sridhar Samudrala
@ 2016-08-04 16:49 ` Sridhar Samudrala
  2016-08-10  0:21   ` Nambiar, Amritha
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 4/5] i40e: Refactor flow director filter management to make it per VSI Sridhar Samudrala
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Sridhar Samudrala @ 2016-08-04 16:49 UTC (permalink / raw)
  To: intel-wired-lan

Sample script that shows ethtool stats on VF representor netdev
PF: enp5s0f0, VF0: enp5s2  VF_REP0: enp5s0f0-vf0

   # devlink dev set pci/0000:05:00.0 smode switchdev
   # echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
   # ip link set enp5s2 up
   # ethtool -S enp5s0f0-vf0
   NIC statistics:
     rx_bytes: 0
     rx_unicast: 0
     rx_multicast: 0
     rx_broadcast: 0
     rx_discards: 0
     rx_unknown_protocol: 0
     tx_bytes: 140
     tx_unicast: 0
     tx_multicast: 2
     tx_broadcast: 0
     tx_discards: 0
     tx_errors: 0

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h             |  1 +
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c     | 67 ++++++++++++++++++++++
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |  1 +
 3 files changed, 69 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 724bd82..2d35c0d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -875,4 +875,5 @@ i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf);
 i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf);
 i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf);
 void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
+void i40e_set_vf_netdev_ethtool_ops(struct net_device *netdev);
 #endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index c912e04..0c3bffa 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -3040,3 +3040,70 @@ void i40e_set_ethtool_ops(struct net_device *netdev)
 {
 	netdev->ethtool_ops = &i40e_ethtool_ops;
 }
+
+static const char i40e_vf_netdev_ethtool_sset[][ETH_GSTRING_LEN] = {
+	"rx_bytes",
+	"rx_unicast",
+	"rx_multicast",
+	"rx_broadcast",
+	"rx_discards",
+	"rx_unknown_protocol",
+	"tx_bytes",
+	"tx_unicast",
+	"tx_multicast",
+	"tx_broadcast",
+	"tx_discards",
+	"tx_errors",
+};
+
+#define I40E_VF_NETDEV_ETHTOOL_STAT_COUNT \
+			ARRAY_SIZE(i40e_vf_netdev_ethtool_sset)
+
+static void i40e_vf_netdev_ethtool_get_strings(struct net_device *dev,
+					       u32 stringset,
+					       u8 *ethtool_strings)
+{
+	switch (stringset) {
+	case ETH_SS_STATS:
+		memcpy(ethtool_strings, &i40e_vf_netdev_ethtool_sset,
+		       sizeof(i40e_vf_netdev_ethtool_sset));
+		break;
+	}
+}
+
+static int i40e_vf_netdev_ethtool_get_sset_count(struct net_device *dev,
+						 int stringset)
+{
+	switch (stringset) {
+	case ETH_SS_STATS:
+		return I40E_VF_NETDEV_ETHTOOL_STAT_COUNT;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void i40e_vf_netdev_ethtool_get_stats(struct net_device *dev,
+				struct ethtool_stats *target_ethtool_stats,
+				u64 *target_stat_values)
+{
+	struct i40e_vf_netdev_priv *priv = netdev_priv(dev);
+	struct i40e_vf *vf = priv->vf;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	i40e_update_stats(vsi);
+	memcpy(target_stat_values, &vsi->eth_stats,
+	       I40E_VF_NETDEV_ETHTOOL_STAT_COUNT * 8);
+}
+
+static const struct ethtool_ops i40e_vf_netdev_ethtool_ops = {
+	.get_strings		= i40e_vf_netdev_ethtool_get_strings,
+	.get_ethtool_stats      = i40e_vf_netdev_ethtool_get_stats,
+	.get_sset_count         = i40e_vf_netdev_ethtool_get_sset_count,
+};
+
+void i40e_set_vf_netdev_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &i40e_vf_netdev_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index d41c37c..313a53c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1046,6 +1046,7 @@ int i40e_alloc_vf_netdev(struct i40e_vf *vf, u16 vf_num)
 	priv->vf = &(pf->vf[vf_num]);
 
 	netdev->netdev_ops = &i40e_vf_netdev_ops;
+	i40e_set_vf_netdev_ethtool_ops(netdev);
 
 	netif_carrier_off(netdev);
 	netif_tx_disable(netdev);
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 4/5] i40e: Refactor flow director filter management to make it per VSI.
  2016-08-04 16:49 [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Sridhar Samudrala
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 2/5] i40e: Introduce VF representor/control netdevs Sridhar Samudrala
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 3/5] i40e: Enable VF specific ethtool statistics via VF representor netdevs Sridhar Samudrala
@ 2016-08-04 16:49 ` Sridhar Samudrala
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 5/5] i40e: Enable ntuple filters on VFs via VF representor netdevs Sridhar Samudrala
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Sridhar Samudrala @ 2016-08-04 16:49 UTC (permalink / raw)
  To: intel-wired-lan

Three fields in struct i40e_pf - fd_filter_list, fdir_pf_active_filters and
fd_inv are moved to struct i40e_vsi.

This enables the later patch to support ntuple filters on a VF using
VF representor netdev.

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h         |   7 +-
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c |  31 +++---
 drivers/net/ethernet/intel/i40e/i40e_main.c    | 147 +++++++++++++++++++------
 drivers/net/ethernet/intel/i40e/i40e_txrx.c    |   5 +-
 4 files changed, 138 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 2d35c0d..a0344e9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -296,8 +296,6 @@ struct i40e_pf {
 	u8 atr_sample_rate;
 	bool wol_en;
 
-	struct hlist_head fdir_filter_list;
-	u16 fdir_pf_active_filters;
 	unsigned long fd_flush_timestamp;
 	u32 fd_flush_cnt;
 	u32 fd_add_err;
@@ -446,7 +444,6 @@ struct i40e_pf {
 	u32 npar_min_bw;
 
 	u32 ioremap_len;
-	u32 fd_inv;
 	u16 phy_led_val;
 	enum devlink_eswitch_mode eswitch_mode;
 };
@@ -529,6 +526,10 @@ struct i40e_vsi {
 	u32 rx_buf_failed;
 	u32 rx_page_failed;
 
+	struct hlist_head fdir_filter_list;
+	u16 fdir_active_filters;
+	u32 fd_inv;
+
 	/* These are containers of ring pointers, allocated at run-time */
 	struct i40e_ring **rx_rings;
 	struct i40e_ring **tx_rings;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 0c3bffa..9a5c807 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2181,7 +2181,7 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
 
 /**
  * i40e_get_ethtool_fdir_all - Populates the rule count of a command
- * @pf: Pointer to the physical function struct
+ * @vsi: Pointer to the VSI struct
  * @cmd: The command to get or set Rx flow classification rules
  * @rule_locs: Array of used rule locations
  *
@@ -2190,19 +2190,20 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
  *
  * Returns 0 on success or -EMSGSIZE if entry not found
  **/
-static int i40e_get_ethtool_fdir_all(struct i40e_pf *pf,
+static int i40e_get_ethtool_fdir_all(struct i40e_vsi *vsi,
 				     struct ethtool_rxnfc *cmd,
 				     u32 *rule_locs)
 {
 	struct i40e_fdir_filter *rule;
 	struct hlist_node *node2;
+	struct i40e_pf *pf = vsi->back;
 	int cnt = 0;
 
 	/* report total rule count */
 	cmd->data = i40e_get_fd_cnt_all(pf);
 
 	hlist_for_each_entry_safe(rule, node2,
-				  &pf->fdir_filter_list, fdir_node) {
+				  &vsi->fdir_filter_list, fdir_node) {
 		if (cnt == cmd->rule_cnt)
 			return -EMSGSIZE;
 
@@ -2217,7 +2218,7 @@ static int i40e_get_ethtool_fdir_all(struct i40e_pf *pf,
 
 /**
  * i40e_get_ethtool_fdir_entry - Look up a filter based on Rx flow
- * @pf: Pointer to the physical function struct
+ * @vsi: Pointer to the VSI struct
  * @cmd: The command to get or set Rx flow classification rules
  *
  * This function looks up a filter based on the Rx flow classification
@@ -2225,16 +2226,17 @@ static int i40e_get_ethtool_fdir_all(struct i40e_pf *pf,
  *
  * Returns 0 on success or -EINVAL if filter not found
  **/
-static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
+static int i40e_get_ethtool_fdir_entry(struct i40e_vsi *vsi,
 				       struct ethtool_rxnfc *cmd)
 {
 	struct ethtool_rx_flow_spec *fsp =
 			(struct ethtool_rx_flow_spec *)&cmd->fs;
 	struct i40e_fdir_filter *rule = NULL;
 	struct hlist_node *node2;
+	struct i40e_pf *pf = vsi->back;
 
 	hlist_for_each_entry_safe(rule, node2,
-				  &pf->fdir_filter_list, fdir_node) {
+				  &vsi->fdir_filter_list, fdir_node) {
 		if (fsp->location <= rule->fd_id)
 			break;
 	}
@@ -2262,7 +2264,7 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
 	else
 		fsp->ring_cookie = rule->q_index;
 
-	if (rule->dest_vsi != pf->vsi[pf->lan_vsi]->id) {
+	if (rule->dest_vsi != vsi->id) {
 		struct i40e_vsi *vsi;
 
 		vsi = i40e_find_vsi_from_id(pf, rule->dest_vsi);
@@ -2299,16 +2301,16 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 		ret = i40e_get_rss_hash_opts(pf, cmd);
 		break;
 	case ETHTOOL_GRXCLSRLCNT:
-		cmd->rule_cnt = pf->fdir_pf_active_filters;
+		cmd->rule_cnt = vsi->fdir_active_filters;
 		/* report total rule count */
 		cmd->data = i40e_get_fd_cnt_all(pf);
 		ret = 0;
 		break;
 	case ETHTOOL_GRXCLSRULE:
-		ret = i40e_get_ethtool_fdir_entry(pf, cmd);
+		ret = i40e_get_ethtool_fdir_entry(vsi, cmd);
 		break;
 	case ETHTOOL_GRXCLSRLALL:
-		ret = i40e_get_ethtool_fdir_all(pf, cmd, rule_locs);
+		ret = i40e_get_ethtool_fdir_all(vsi, cmd, rule_locs);
 		break;
 	default:
 		break;
@@ -2483,7 +2485,6 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
 					  struct ethtool_rxnfc *cmd)
 {
 	struct i40e_fdir_filter *rule, *parent;
-	struct i40e_pf *pf = vsi->back;
 	struct hlist_node *node2;
 	int err = -EINVAL;
 
@@ -2491,7 +2492,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
 	rule = NULL;
 
 	hlist_for_each_entry_safe(rule, node2,
-				  &pf->fdir_filter_list, fdir_node) {
+				  &vsi->fdir_filter_list, fdir_node) {
 		/* hash found, or no matching entry */
 		if (rule->fd_id >= sw_idx)
 			break;
@@ -2506,7 +2507,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
 			err = i40e_add_del_fdir(vsi, rule, false);
 		hlist_del(&rule->fdir_node);
 		kfree(rule);
-		pf->fdir_pf_active_filters--;
+		vsi->fdir_active_filters--;
 	}
 
 	/* If no input this was a delete, err should be 0 if a rule was
@@ -2523,10 +2524,10 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
 		hlist_add_behind(&input->fdir_node, &parent->fdir_node);
 	else
 		hlist_add_head(&input->fdir_node,
-			       &pf->fdir_filter_list);
+			       &vsi->fdir_filter_list);
 
 	/* update counts */
-	pf->fdir_pf_active_filters++;
+	vsi->fdir_active_filters++;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 208da9f..579ea8a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3091,25 +3091,46 @@ static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
 }
 
 /**
- * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters
- * @vsi: Pointer to the targeted VSI
+ * i40e_vsi_fdir_filter_restore - Restore the Sideband Flow Director filters
+ * @vsi: Pointer to VSI
  *
  * This function replays the hlist on the hw where all the SB Flow Director
- * filters were saved.
+ * filters were saved for a VSI.
  **/
-static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
+
+static void i40e_vsi_fdir_filter_restore(struct i40e_vsi *vsi)
 {
 	struct i40e_fdir_filter *filter;
-	struct i40e_pf *pf = vsi->back;
 	struct hlist_node *node;
 
+	hlist_for_each_entry_safe(filter, node,
+				  &vsi->fdir_filter_list, fdir_node) {
+		i40e_add_del_fdir(vsi, filter, true);
+	}
+}
+
+
+/**
+ * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters
+ * @pf: Pointer to PF
+ *
+ * This function calls i40_vsi_fdi_filter_restore() for all the VSIs
+ * associated with the PF that have flow director filters.
+ **/
+static void i40e_fdir_filter_restore(struct i40e_pf *pf)
+{
+	struct i40e_vf *vf;
+	int i;
+
 	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
 		return;
 
-	hlist_for_each_entry_safe(filter, node,
-				  &pf->fdir_filter_list, fdir_node) {
-		i40e_add_del_fdir(vsi, filter, true);
+	for (i = 0; i < pf->num_alloc_vfs; i++) {
+		vf = &(pf->vf[i]);
+		i40e_vsi_fdir_filter_restore(pf->vsi[vf->lan_vsi_idx]);
 	}
+
+	i40e_vsi_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
 }
 
 /**
@@ -5232,7 +5253,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
 				dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 exist\n");
 			pf->fd_tcp_rule = 0;
 		}
-		i40e_fdir_filter_restore(vsi);
+		i40e_fdir_filter_restore(pf);
 	}
 
 	/* On the next run of the service_task, notify any clients of the new
@@ -5498,23 +5519,43 @@ err_setup_tx:
 }
 
 /**
- * i40e_fdir_filter_exit - Cleans up the Flow Director accounting
- * @pf: Pointer to PF
+ * i40e_vsi_fdir_filter_exit - Cleans up the Flow Director accounting
+ * @vsi: Pointer to VSI
  *
  * This function destroys the hlist where all the Flow Director
- * filters were saved.
+ * filters were saved for a VSI.
  **/
-static void i40e_fdir_filter_exit(struct i40e_pf *pf)
+static void i40e_vsi_fdir_filter_exit(struct i40e_vsi *vsi)
 {
 	struct i40e_fdir_filter *filter;
-	struct hlist_node *node2;
+	struct hlist_node *node;
 
-	hlist_for_each_entry_safe(filter, node2,
-				  &pf->fdir_filter_list, fdir_node) {
+	hlist_for_each_entry_safe(filter, node, &vsi->fdir_filter_list,
+				  fdir_node) {
 		hlist_del(&filter->fdir_node);
 		kfree(filter);
 	}
-	pf->fdir_pf_active_filters = 0;
+	vsi->fdir_active_filters = 0;
+}
+
+/**
+ * i40e_fdir_filter_exit - Cleans up the Flow Director accounting
+ * @pf: Pointer to PF
+ *
+ * This function destroys the hlist's where all the Flow Director
+ * filters were saved.
+ **/
+static void i40e_fdir_filter_exit(struct i40e_pf *pf)
+{
+	struct i40e_vf *vf;
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++) {
+		vf = &(pf->vf[i]);
+		i40e_vsi_fdir_filter_exit(pf->vsi[vf->lan_vsi_idx]);
+	}
+
+	i40e_vsi_fdir_filter_exit(pf->vsi[pf->lan_vsi]);
 }
 
 /**
@@ -5898,14 +5939,37 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf)
 }
 
 /**
+ * i40e_vsi_fd_inv_clean - Function to reenabe FD ATR or SB if disabled
+ * @vsi: Pointer to VSI
+ **/
+static void i40e_vsi_fd_inv_clean(struct i40e_vsi *vsi)
+{
+	struct i40e_fdir_filter *filter;
+	struct hlist_node *node;
+
+	if (!vsi->fd_inv)
+		return;
+
+	hlist_for_each_entry_safe(filter, node, &vsi->fdir_filter_list,
+				  fdir_node) {
+		if (filter->fd_id == vsi->fd_inv) {
+			hlist_del(&filter->fdir_node);
+			kfree(filter);
+			vsi->fdir_active_filters--;
+		}
+	}
+}
+
+
+/**
  * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled
  * @pf: board private structure
  **/
 void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
 {
-	struct i40e_fdir_filter *filter;
+	struct i40e_vf *vf;
 	u32 fcnt_prog, fcnt_avail;
-	struct hlist_node *node;
+	int i;
 
 	if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
 		return;
@@ -5936,16 +6000,35 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
 	}
 
 	/* if hw had a problem adding a filter, delete it */
-	if (pf->fd_inv > 0) {
-		hlist_for_each_entry_safe(filter, node,
-					  &pf->fdir_filter_list, fdir_node) {
-			if (filter->fd_id == pf->fd_inv) {
-				hlist_del(&filter->fdir_node);
-				kfree(filter);
-				pf->fdir_pf_active_filters--;
-			}
-		}
+
+	for (i = 0; i < pf->num_alloc_vfs; i++) {
+		vf = &(pf->vf[i]);
+		i40e_vsi_fd_inv_clean(pf->vsi[vf->lan_vsi_idx]);
+	}
+	i40e_vsi_fd_inv_clean(pf->vsi[pf->lan_vsi]);
+}
+
+/**
+ * i40e_pf_get_fdir_active_filters - Returns the numbe of active filters
+ * associated with a PF
+ *
+ * @pf: Pointer to PF
+ *
+ **/
+static u16 i40e_pf_get_fdir_active_filters(struct i40e_pf *pf)
+{
+	struct i40e_vf *vf;
+	u16 cnt = 0;
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++) {
+		vf = &(pf->vf[i]);
+		cnt += pf->vsi[vf->lan_vsi_idx]->fdir_active_filters;
 	}
+
+	cnt += pf->vsi[pf->lan_vsi]->fdir_active_filters;
+
+	return cnt;
 }
 
 #define I40E_MIN_FD_FLUSH_INTERVAL 10
@@ -5974,7 +6057,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 	 */
 	min_flush_time = pf->fd_flush_timestamp +
 			 (I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ);
-	fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters;
+	fd_room = pf->fdir_pf_filter_count - i40e_pf_get_fdir_active_filters(pf);
 
 	if (!(time_after(jiffies, min_flush_time)) &&
 	    (fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) {
@@ -6002,7 +6085,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 		dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n");
 	} else {
 		/* replay sideband filters */
-		i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
+		i40e_fdir_filter_restore(pf);
 		if (!disable_atr)
 			pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
 		clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
@@ -6017,7 +6100,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
  **/
 u32 i40e_get_current_atr_cnt(struct i40e_pf *pf)
 {
-	return i40e_get_current_fd_count(pf) - pf->fdir_pf_active_filters;
+	return i40e_get_current_fd_count(pf) - i40e_pf_get_fdir_active_filters(pf);
 }
 
 /* We can see up to 256 filter programming desc in transit if the filters are
@@ -6732,6 +6815,7 @@ static void i40e_fdir_teardown(struct i40e_pf *pf)
 	int i;
 
 	i40e_fdir_filter_exit(pf);
+
 	for (i = 0; i < pf->num_alloc_vsi; i++) {
 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
 			i40e_vsi_release(pf->vsi[i]);
@@ -8665,7 +8749,6 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
 		pf->auto_disable_flags &= ~I40E_FLAG_FD_SB_ENABLED;
 		/* reset fd counters */
 		pf->fd_add_err = pf->fd_atr_cnt = pf->fd_tcp_rule = 0;
-		pf->fdir_pf_active_filters = 0;
 		pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index df7ecc9..b2f6ca6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -463,6 +463,7 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 				  union i40e_rx_desc *rx_desc, u8 prog_id)
 {
+	struct i40e_vsi *vsi = rx_ring->vsi;
 	struct i40e_pf *pf = rx_ring->vsi->back;
 	struct pci_dev *pdev = pf->pdev;
 	u32 fcnt_prog, fcnt_avail;
@@ -474,11 +475,11 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
 
 	if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
-		pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
+		vsi->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
 		if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
 			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
-				 pf->fd_inv);
+				 vsi->fd_inv);
 
 		/* Check if the programming error is for ATR.
 		 * If so, auto disable ATR and set a state for
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 5/5] i40e: Enable ntuple filters on VFs via VF representor netdevs.
  2016-08-04 16:49 [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Sridhar Samudrala
                   ` (2 preceding siblings ...)
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 4/5] i40e: Refactor flow director filter management to make it per VSI Sridhar Samudrala
@ 2016-08-04 16:49 ` Sridhar Samudrala
  2016-08-10  0:12 ` [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Nambiar, Amritha
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Sridhar Samudrala @ 2016-08-04 16:49 UTC (permalink / raw)
  To: intel-wired-lan

Sample script that shows how ntuple filters can be added to VFs.
PF: enp5s0f0, VF_REP0: enp5s0f0-vf0, VF_REP1: enp5s0f0-vf1

   # devlink dev set pci/0000:05:00.0 smode switchdev
   # echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
   # ethtool -N enp5s0f0 flow-type ip4 src-ip 192.168.1.1 \
       dst-ip 192.168.1.2 action -1
   # ethtool -N enp5s0f0-vf0 flow-type ip4 src-ip 192.168.10.1 \
       dst-ip 192.168.10.2 action -1
   # ethtool -N enp5s0f0-vf1 flow-type ip4 src-ip 192.168.11.1 \
       dst-ip 192.168.11.2 action -1
   # ethtool -n enp5s0f0
     72 RX rings available
     Total 1 rules

     Filter: 7679
         Rule Type: Raw IPv4
         Src IP addr: 192.168.1.1 mask: 255.255.255.255
         Dest IP addr: 192.168.1.2 mask: 255.255.255.255
         TOS: 0x0 mask: 0xff
         Protocol: 0 mask: 0xff
         L4 bytes: 0x0 mask: 0xffffffff
         Action: Drop
   # ethtool -n enp5s0f0-vf0
     4 RX rings available
     Total 1 rules

     Filter: 7679
         Rule Type: Raw IPv4
         Src IP addr: 192.168.10.1 mask: 255.255.255.255
         Dest IP addr: 192.168.10.2 mask: 255.255.255.255
         TOS: 0x0 mask: 0xff
         Protocol: 0 mask: 0xff
         L4 bytes: 0x0 mask: 0xffffffff
         Action: Drop
   # ethtool -n enp5s0f0-vf1
     4 RX rings available
     Total 1 rules

     Filter: 7679
         Rule Type: Raw IPv4
         Src IP addr: 192.168.11.1 mask: 255.255.255.255
         Dest IP addr: 192.168.11.2 mask: 255.255.255.255
         TOS: 0x0 mask: 0xff
         Protocol: 0 mask: 0xff
         L4 bytes: 0x0 mask: 0xffffffff
         Action: Drop

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 72 ++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 9a5c807..d159a11 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2320,6 +2320,46 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 }
 
 /**
+ * i40e_vf_netdev_get_rxnfc - command to get VF's RX flow classification rules
+ * @netdev: vf representor network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the command is supported.
+ **/
+static int i40e_vf_netdev_get_rxnfc(struct net_device *netdev,
+				    struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+	struct i40e_vf_netdev_priv *priv = netdev_priv(netdev);
+	struct i40e_vf *vf = priv->vf;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = vsi->num_queue_pairs;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = vsi->fdir_active_filters;
+		/* report total rule count */
+		cmd->data = i40e_get_fd_cnt_all(pf);
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = i40e_get_ethtool_fdir_entry(vsi, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = i40e_get_ethtool_fdir_all(vsi, cmd, rule_locs);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/**
  * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash
  * @pf: pointer to the physical function struct
  * @cmd: ethtool rxnfc command
@@ -2697,6 +2737,36 @@ static int i40e_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
 }
 
 /**
+ * i40e_vf_netdev_set_rxnfc - command to set VF's RX flow classification rules
+ * @netdev: VF representor's network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the command is supported.
+ **/
+static int i40e_vf_netdev_set_rxnfc(struct net_device *netdev,
+				    struct ethtool_rxnfc *cmd)
+{
+	struct i40e_vf_netdev_priv *priv = netdev_priv(netdev);
+	struct i40e_vf *vf = priv->vf;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		ret = i40e_add_fdir_ethtool(vsi, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = i40e_del_fdir_entry(vsi, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/**
  * i40e_max_channels - get Max number of combined channels supported
  * @vsi: vsi pointer
  **/
@@ -3102,6 +3172,8 @@ static const struct ethtool_ops i40e_vf_netdev_ethtool_ops = {
 	.get_strings		= i40e_vf_netdev_ethtool_get_strings,
 	.get_ethtool_stats      = i40e_vf_netdev_ethtool_get_stats,
 	.get_sset_count         = i40e_vf_netdev_ethtool_get_sset_count,
+	.get_rxnfc		= i40e_vf_netdev_get_rxnfc,
+	.set_rxnfc		= i40e_vf_netdev_set_rxnfc,
 };
 
 void i40e_set_vf_netdev_ethtool_ops(struct net_device *netdev)
-- 
2.5.5


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-04 16:49 [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Sridhar Samudrala
                   ` (3 preceding siblings ...)
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 5/5] i40e: Enable ntuple filters on VFs via VF representor netdevs Sridhar Samudrala
@ 2016-08-10  0:12 ` Nambiar, Amritha
  2016-08-10 16:01 ` Alexander Duyck
  2016-08-10 20:50 ` Jeff Kirsher
  6 siblings, 0 replies; 18+ messages in thread
From: Nambiar, Amritha @ 2016-08-10  0:12 UTC (permalink / raw)
  To: intel-wired-lan


On 8/4/2016 9:49 AM, Sridhar Samudrala wrote:
> Add initial devlink support to set/get the mode of SRIOV switch.
> This patch allows the mode to be set to either 'legacy' or 'switchdev', but
> doesn't implement any functionality to create vf representors in switchdev
> mode.
>
> With smode support in iproute2 'devlink' utility, switch mode can be set
> and get via following commands.
>
>      # devlink dev smode pci/0000:05:00.0
>      mode: legacy
>      # devlink dev set pci/0000:05:00.0 smode switchdev
>      # devlink dev smode pci/0000:05:00.0
>      mode: switchdev
>
> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
   Acked-by: Amritha Nambiar <amritha.nambiar@intel.com>
> ---
>   drivers/net/ethernet/intel/Kconfig          |  1 +
>   drivers/net/ethernet/intel/i40e/i40e.h      |  3 ++
>   drivers/net/ethernet/intel/i40e/i40e_main.c | 84 ++++++++++++++++++++++++++---
>   3 files changed, 80 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
> index c0e1743..2ede229 100644
> --- a/drivers/net/ethernet/intel/Kconfig
> +++ b/drivers/net/ethernet/intel/Kconfig
> @@ -215,6 +215,7 @@ config I40E
>   	tristate "Intel(R) Ethernet Controller XL710 Family support"
>   	select PTP_1588_CLOCK
>   	depends on PCI
> +	depends on MAY_USE_DEVLINK
>   	---help---
>   	  This driver supports Intel(R) Ethernet Controller XL710 Family of
>   	  devices.  For more information on how to identify your adapter, go
> diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
> index 2a88291..724bd82 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e.h
> @@ -53,6 +53,8 @@
>   #include <linux/clocksource.h>
>   #include <linux/net_tstamp.h>
>   #include <linux/ptp_clock_kernel.h>
> +#include <net/devlink.h>
> +
>   #include "i40e_type.h"
>   #include "i40e_prototype.h"
>   #ifdef I40E_FCOE
> @@ -446,6 +448,7 @@ struct i40e_pf {
>   	u32 ioremap_len;
>   	u32 fd_inv;
>   	u16 phy_led_val;
> +	enum devlink_eswitch_mode eswitch_mode;
>   };
>   
>   enum i40e_filter_state {
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index 339d99b..bb44f09 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -27,6 +27,7 @@
>   #include <linux/etherdevice.h>
>   #include <linux/of_net.h>
>   #include <linux/pci.h>
> +#include <net/devlink.h>
>   
>   /* Local includes */
>   #include "i40e.h"
> @@ -10671,6 +10672,58 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
>   }
>   
>   /**
> + * i40e_devlink_eswitch_mode_get
> + *
> + * @devlink: pointer to devlink struct
> + * @mode: sr-iov switch mode pointer
> + *
> + * Returns the switch mode of the associated PF in the @mode pointer.
> + */
> +static int i40e_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
> +{
> +	struct i40e_pf *pf = devlink_priv(devlink);
> +
> +	*mode = pf->eswitch_mode;
> +
> +	return 0;
> +}
> +
> +/**
> + * i40e_devlink_eswitch_mode_set
> + *
> + * @devlink: pointer to devlink struct
> + * @mode: sr-iov switch mode
> + *
> + * Set the switch mode of the associated PF.
> + * Returns 0 on success and -EOPNOTSUPP on error.
> + */
> +static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
> +{
> +	struct i40e_pf *pf = devlink_priv(devlink);
> +	int err = 0;
> +
> +	if (mode == pf->eswitch_mode)
> +		goto done;
> +
> +	switch (mode) {
> +	case DEVLINK_ESWITCH_MODE_LEGACY:
> +	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
> +		pf->eswitch_mode = mode;
> +		break;
> +	default:
> +		err = -EOPNOTSUPP;
> +	}
> +
> +done:
> +	return err;
> +}
> +
> +static const struct devlink_ops i40e_devlink_ops = {
> +	.eswitch_mode_get = i40e_devlink_eswitch_mode_get,
> +	.eswitch_mode_set = i40e_devlink_eswitch_mode_set,
> +};
> +
> +/**
>    * i40e_probe - Device initialization routine
>    * @pdev: PCI device information struct
>    * @ent: entry in i40e_pci_tbl
> @@ -10687,6 +10740,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	struct i40e_pf *pf;
>   	struct i40e_hw *hw;
>   	static u16 pfs_found;
> +	struct devlink *devlink;
>   	u16 wol_nvm_bits;
>   	u16 link_status;
>   	int err;
> @@ -10721,16 +10775,21 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	pci_enable_pcie_error_reporting(pdev);
>   	pci_set_master(pdev);
>   
> +	devlink = devlink_alloc(&i40e_devlink_ops, sizeof(*pf));
> +	if (!devlink) {
> +		dev_err(&pdev->dev,
> +			"devlink_alloc failed\n");
> +		err = -ENOMEM;
> +		goto err_devlink_alloc;
> +	}
> +
>   	/* Now that we have a PCI connection, we need to do the
>   	 * low level device setup.  This is primarily setting up
>   	 * the Admin Queue structures and then querying for the
>   	 * device's current profile information.
>   	 */
> -	pf = kzalloc(sizeof(*pf), GFP_KERNEL);
> -	if (!pf) {
> -		err = -ENOMEM;
> -		goto err_pf_alloc;
> -	}
> +	pf = devlink_priv(devlink);
> +
>   	pf->next_vsi = 0;
>   	pf->pdev = pdev;
>   	set_bit(__I40E_DOWN, &pf->state);
> @@ -11047,6 +11106,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   		}
>   	}
>   
> +	pf->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
> +	err = devlink_register(devlink, &pdev->dev);
> +	if (err)
> +		goto err_devlink_register;
> +
>   #ifdef CONFIG_PCI_IOV
>   	/* prep for VF support */
>   	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
> @@ -11188,6 +11252,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	return 0;
>   
>   	/* Unwind what we've done if something failed in the setup */
> +err_devlink_register:
> +	i40e_stop_misc_vector(pf);
>   err_vsis:
>   	set_bit(__I40E_DOWN, &pf->state);
>   	i40e_clear_interrupt_scheme(pf);
> @@ -11205,8 +11271,8 @@ err_adminq_setup:
>   err_pf_reset:
>   	iounmap(hw->hw_addr);
>   err_ioremap:
> -	kfree(pf);
> -err_pf_alloc:
> +	devlink_free(devlink);
> +err_devlink_alloc:
>   	pci_disable_pcie_error_reporting(pdev);
>   	pci_release_selected_regions(pdev,
>   				     pci_select_bars(pdev, IORESOURCE_MEM));
> @@ -11229,9 +11295,11 @@ static void i40e_remove(struct pci_dev *pdev)
>   {
>   	struct i40e_pf *pf = pci_get_drvdata(pdev);
>   	struct i40e_hw *hw = &pf->hw;
> +	struct devlink *devlink = priv_to_devlink(pf);
>   	i40e_status ret_code;
>   	int i;
>   
> +	devlink_unregister(devlink);
>   	i40e_dbg_pf_exit(pf);
>   
>   	i40e_ptp_stop(pf);
> @@ -11319,7 +11387,7 @@ static void i40e_remove(struct pci_dev *pdev)
>   	kfree(pf->vsi);
>   
>   	iounmap(hw->hw_addr);
> -	kfree(pf);
> +	devlink_free(devlink);
>   	pci_release_selected_regions(pdev,
>   				     pci_select_bars(pdev, IORESOURCE_MEM));
>   


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 2/5] i40e: Introduce VF representor/control netdevs.
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 2/5] i40e: Introduce VF representor/control netdevs Sridhar Samudrala
@ 2016-08-10  0:18   ` Nambiar, Amritha
  0 siblings, 0 replies; 18+ messages in thread
From: Nambiar, Amritha @ 2016-08-10  0:18 UTC (permalink / raw)
  To: intel-wired-lan

On 8/4/2016 9:49 AM, Sridhar Samudrala wrote:
> VF representor/control netdevs are created for each VF if the switch mode
> is set to 'switchdev'. These netdevs can be used to control and configure
> VFs from PFs namespace. They enable exposing VF statistics, configuring
> ntuple filters, additional mac/vlans to VFs etc.
>
> Sample script to create VF representors
>      # devlink dev set pci/0000:05:00.0 smode switchdev
>      # echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
>      # ip l show
>      297: enp5s0f0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop portid 6805ca2e7268 state DOWN mode DEFAULT group default qlen 1000
>      link/ether 68:05:ca:2e:72:68 brd ff:ff:ff:ff:ff:ff
>      vf 0 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off
>      vf 1 MAC 00:00:00:00:00:00, spoof checking on, link-state auto, trust off
>      299: enp5s0f0-vf0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
>      link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
>      300: enp5s0f0-vf1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
>      link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
>
> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>

  Acked-by: Amritha Nambiar <amritha.nambiar@intel.com>

> ---
>   drivers/net/ethernet/intel/i40e/i40e_main.c        | 13 +++-
>   drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 90 ++++++++++++++++++++++
>   drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 14 ++++
>   3 files changed, 116 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index bb44f09..208da9f 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -10700,14 +10700,25 @@ static int i40e_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
>   static int i40e_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
>   {
>   	struct i40e_pf *pf = devlink_priv(devlink);
> -	int err = 0;
> +	struct i40e_vf *vf;
> +	int i, err = 0;
>   
>   	if (mode == pf->eswitch_mode)
>   		goto done;
>   
>   	switch (mode) {
>   	case DEVLINK_ESWITCH_MODE_LEGACY:
> +		for (i = 0; i < pf->num_alloc_vfs; i++) {
> +			vf = &(pf->vf[i]);
> +			i40e_free_vf_netdev(vf);
> +		}
> +		pf->eswitch_mode = mode;
> +		break;
>   	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
> +		for (i = 0; i < pf->num_alloc_vfs; i++) {
> +			vf = &(pf->vf[i]);
> +			i40e_alloc_vf_netdev(vf, i);
> +		}
>   		pf->eswitch_mode = mode;
>   		break;
>   	default:
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> index 4bd66d7..d41c37c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> @@ -1000,6 +1000,90 @@ complete_reset:
>   	clear_bit(__I40E_VF_DISABLE, &pf->state);
>   }
>   
> +static int i40e_vf_netdev_open(struct net_device *dev)
> +{
> +	return 0;
> +}
> +
> +static int i40e_vf_netdev_stop(struct net_device *dev)
> +{
> +	return 0;
> +}
> +
> +static const struct net_device_ops i40e_vf_netdev_ops = {
> +	.ndo_open       = i40e_vf_netdev_open,
> +	.ndo_stop       = i40e_vf_netdev_stop,
> +};
> +
> +/**
> + * i40e_alloc_vf_netdev
> + * @vf: pointer to the VF structure
> + * @vf_num: VF number
> + *
> + * Create VF representor/control netdev
> + **/
> +int i40e_alloc_vf_netdev(struct i40e_vf *vf, u16 vf_num)
> +{
> +	struct net_device *netdev;
> +	char netdev_name[IFNAMSIZ];
> +	struct i40e_vf_netdev_priv *priv;
> +	struct i40e_pf *pf = vf->pf;
> +	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
> +	int err;
> +
> +	snprintf(netdev_name, IFNAMSIZ, "%s-vf%d", vsi->netdev->name, vf_num);
> +	netdev = alloc_netdev(sizeof(struct i40e_vf_netdev_priv), netdev_name,
> +			      NET_NAME_UNKNOWN, ether_setup);
> +	if (!netdev) {
> +		dev_err(&pf->pdev->dev, "alloc_netdev failed for vf:%d\n",
> +			vf_num);
> +		return -ENOMEM;
> +	}
> +
> +	pf->vf[vf_num].ctrl_netdev = netdev;
> +
> +	priv = netdev_priv(netdev);
> +	priv->vf = &(pf->vf[vf_num]);
> +
> +	netdev->netdev_ops = &i40e_vf_netdev_ops;
> +
> +	netif_carrier_off(netdev);
> +	netif_tx_disable(netdev);
> +
> +	err = register_netdev(netdev);
> +	if (err) {
> +		dev_err(&pf->pdev->dev, "register_netdev failed for vf: %s\n",
> +			vf->ctrl_netdev->name);
> +		free_netdev(netdev);
> +		return err;
> +	}
> +
> +	dev_info(&pf->pdev->dev, "VF representor(%s) created for VF %d\n",
> +		 vf->ctrl_netdev->name, vf_num);
> +
> +	return 0;
> +}
> +
> +/**
> + * i40e_free_vf_netdev
> + * @vf: pointer to the VF structure
> + *
> + * Free VF representor/control netdev
> + **/
> +void i40e_free_vf_netdev(struct i40e_vf *vf)
> +{
> +	struct i40e_pf *pf = vf->pf;
> +
> +	if (!vf->ctrl_netdev)
> +		return;
> +
> +	dev_info(&pf->pdev->dev, "Freeing VF representor(%s)\n",
> +		 vf->ctrl_netdev->name);
> +
> +	unregister_netdev(vf->ctrl_netdev);
> +	free_netdev(vf->ctrl_netdev);
> +}
> +
>   /**
>    * i40e_free_vfs
>    * @pf: pointer to the PF structure
> @@ -1042,6 +1126,9 @@ void i40e_free_vfs(struct i40e_pf *pf)
>   			i40e_free_vf_res(&pf->vf[i]);
>   		/* disable qp mappings */
>   		i40e_disable_vf_mappings(&pf->vf[i]);
> +
> +		if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
> +			i40e_free_vf_netdev(&pf->vf[i]);
>   	}
>   
>   	kfree(pf->vf);
> @@ -1110,6 +1197,9 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
>   		/* VF resources get allocated during reset */
>   		i40e_reset_vf(&vfs[i], false);
>   
> +		if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
> +			i40e_alloc_vf_netdev(&vfs[i], i);
> +
>   	}
>   	pf->num_alloc_vfs = num_alloc_vfs;
>   
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> index 8751741..8446547 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
> @@ -72,10 +72,21 @@ enum i40e_vf_capabilities {
>   	I40E_VIRTCHNL_VF_CAP_IWARP,
>   };
>   
> +/* VF Ctrl netdev private structure */
> +struct i40e_vf_netdev_priv {
> +	struct i40e_vf *vf;
> +};
> +
>   /* VF information structure */
>   struct i40e_vf {
>   	struct i40e_pf *pf;
>   
> +	/* VF representor netdev that allows control and configuration of
> +	 * VFs from the host. Enables returning VF stats, configuring ntuple
> +	 * filters on VFs, adding multiple mac/vlans to VFs etc.
> +	 */
> +	struct net_device *ctrl_netdev;
> +
>   	/* VF id in the PF space */
>   	s16 vf_id;
>   	/* all VF vsis connect to the same parent */
> @@ -142,4 +153,7 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable);
>   void i40e_vc_notify_link_state(struct i40e_pf *pf);
>   void i40e_vc_notify_reset(struct i40e_pf *pf);
>   
> +int i40e_alloc_vf_netdev(struct i40e_vf *vf, u16 vf_num);
> +void i40e_free_vf_netdev(struct i40e_vf *vf);
> +
>   #endif /* _I40E_VIRTCHNL_PF_H_ */


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 3/5] i40e: Enable VF specific ethtool statistics via VF representor netdevs.
  2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 3/5] i40e: Enable VF specific ethtool statistics via VF representor netdevs Sridhar Samudrala
@ 2016-08-10  0:21   ` Nambiar, Amritha
  0 siblings, 0 replies; 18+ messages in thread
From: Nambiar, Amritha @ 2016-08-10  0:21 UTC (permalink / raw)
  To: intel-wired-lan

On 8/4/2016 9:49 AM, Sridhar Samudrala wrote:
> Sample script that shows ethtool stats on VF representor netdev
> PF: enp5s0f0, VF0: enp5s2  VF_REP0: enp5s0f0-vf0
>
>     # devlink dev set pci/0000:05:00.0 smode switchdev
>     # echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
>     # ip link set enp5s2 up
>     # ethtool -S enp5s0f0-vf0
>     NIC statistics:
>       rx_bytes: 0
>       rx_unicast: 0
>       rx_multicast: 0
>       rx_broadcast: 0
>       rx_discards: 0
>       rx_unknown_protocol: 0
>       tx_bytes: 140
>       tx_unicast: 0
>       tx_multicast: 2
>       tx_broadcast: 0
>       tx_discards: 0
>       tx_errors: 0
>
> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>

  Acked-by: Amritha Nambiar <amritha.nambiar@intel.com>

> ---
>   drivers/net/ethernet/intel/i40e/i40e.h             |  1 +
>   drivers/net/ethernet/intel/i40e/i40e_ethtool.c     | 67 ++++++++++++++++++++++
>   drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |  1 +
>   3 files changed, 69 insertions(+)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
> index 724bd82..2d35c0d 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e.h
> @@ -875,4 +875,5 @@ i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf);
>   i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf);
>   i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf);
>   void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
> +void i40e_set_vf_netdev_ethtool_ops(struct net_device *netdev);
>   #endif /* _I40E_H_ */
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> index c912e04..0c3bffa 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
> @@ -3040,3 +3040,70 @@ void i40e_set_ethtool_ops(struct net_device *netdev)
>   {
>   	netdev->ethtool_ops = &i40e_ethtool_ops;
>   }
> +
> +static const char i40e_vf_netdev_ethtool_sset[][ETH_GSTRING_LEN] = {
> +	"rx_bytes",
> +	"rx_unicast",
> +	"rx_multicast",
> +	"rx_broadcast",
> +	"rx_discards",
> +	"rx_unknown_protocol",
> +	"tx_bytes",
> +	"tx_unicast",
> +	"tx_multicast",
> +	"tx_broadcast",
> +	"tx_discards",
> +	"tx_errors",
> +};
> +
> +#define I40E_VF_NETDEV_ETHTOOL_STAT_COUNT \
> +			ARRAY_SIZE(i40e_vf_netdev_ethtool_sset)
> +
> +static void i40e_vf_netdev_ethtool_get_strings(struct net_device *dev,
> +					       u32 stringset,
> +					       u8 *ethtool_strings)
> +{
> +	switch (stringset) {
> +	case ETH_SS_STATS:
> +		memcpy(ethtool_strings, &i40e_vf_netdev_ethtool_sset,
> +		       sizeof(i40e_vf_netdev_ethtool_sset));
> +		break;
> +	}
> +}
> +
> +static int i40e_vf_netdev_ethtool_get_sset_count(struct net_device *dev,
> +						 int stringset)
> +{
> +	switch (stringset) {
> +	case ETH_SS_STATS:
> +		return I40E_VF_NETDEV_ETHTOOL_STAT_COUNT;
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
> +static void i40e_vf_netdev_ethtool_get_stats(struct net_device *dev,
> +				struct ethtool_stats *target_ethtool_stats,
> +				u64 *target_stat_values)
> +{
> +	struct i40e_vf_netdev_priv *priv = netdev_priv(dev);
> +	struct i40e_vf *vf = priv->vf;
> +	struct i40e_pf *pf = vf->pf;
> +	struct i40e_vsi *vsi;
> +
> +	vsi = pf->vsi[vf->lan_vsi_idx];
> +	i40e_update_stats(vsi);
> +	memcpy(target_stat_values, &vsi->eth_stats,
> +	       I40E_VF_NETDEV_ETHTOOL_STAT_COUNT * 8);
> +}
> +
> +static const struct ethtool_ops i40e_vf_netdev_ethtool_ops = {
> +	.get_strings		= i40e_vf_netdev_ethtool_get_strings,
> +	.get_ethtool_stats      = i40e_vf_netdev_ethtool_get_stats,
> +	.get_sset_count         = i40e_vf_netdev_ethtool_get_sset_count,
> +};
> +
> +void i40e_set_vf_netdev_ethtool_ops(struct net_device *netdev)
> +{
> +	netdev->ethtool_ops = &i40e_vf_netdev_ethtool_ops;
> +}
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> index d41c37c..313a53c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
> @@ -1046,6 +1046,7 @@ int i40e_alloc_vf_netdev(struct i40e_vf *vf, u16 vf_num)
>   	priv->vf = &(pf->vf[vf_num]);
>   
>   	netdev->netdev_ops = &i40e_vf_netdev_ops;
> +	i40e_set_vf_netdev_ethtool_ops(netdev);
>   
>   	netif_carrier_off(netdev);
>   	netif_tx_disable(netdev);


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-04 16:49 [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Sridhar Samudrala
                   ` (4 preceding siblings ...)
  2016-08-10  0:12 ` [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Nambiar, Amritha
@ 2016-08-10 16:01 ` Alexander Duyck
  2016-08-10 16:50   ` John Fastabend
  2016-08-10 20:50 ` Jeff Kirsher
  6 siblings, 1 reply; 18+ messages in thread
From: Alexander Duyck @ 2016-08-10 16:01 UTC (permalink / raw)
  To: intel-wired-lan

On Thu, Aug 4, 2016 at 9:49 AM, Sridhar Samudrala
<sridhar.samudrala@intel.com> wrote:
> Add initial devlink support to set/get the mode of SRIOV switch.
> This patch allows the mode to be set to either 'legacy' or 'switchdev', but
> doesn't implement any functionality to create vf representors in switchdev
> mode.
>
> With smode support in iproute2 'devlink' utility, switch mode can be set
> and get via following commands.
>
>     # devlink dev smode pci/0000:05:00.0
>     mode: legacy
>     # devlink dev set pci/0000:05:00.0 smode switchdev
>     # devlink dev smode pci/0000:05:00.0
>     mode: switchdev
>
> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>

I really don't see much value in this patch.  If you are going to
support SwitchDev then just do it.  Otherwise you are adding extra
overhead for maintaining two different modes.

I would recommend putting this series out to netdev as an RFC.
Submitting it to intel-wired-lan is kind of pointless as the audience
it to small to get any valuable review.

- Alex

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-10 16:01 ` Alexander Duyck
@ 2016-08-10 16:50   ` John Fastabend
  2016-08-10 18:18     ` Alexander Duyck
  0 siblings, 1 reply; 18+ messages in thread
From: John Fastabend @ 2016-08-10 16:50 UTC (permalink / raw)
  To: intel-wired-lan

On 16-08-10 09:01 AM, Alexander Duyck wrote:
> On Thu, Aug 4, 2016 at 9:49 AM, Sridhar Samudrala
> <sridhar.samudrala@intel.com> wrote:
>> Add initial devlink support to set/get the mode of SRIOV switch.
>> This patch allows the mode to be set to either 'legacy' or 'switchdev', but
>> doesn't implement any functionality to create vf representors in switchdev
>> mode.
>>
>> With smode support in iproute2 'devlink' utility, switch mode can be set
>> and get via following commands.
>>
>>     # devlink dev smode pci/0000:05:00.0
>>     mode: legacy
>>     # devlink dev set pci/0000:05:00.0 smode switchdev
>>     # devlink dev smode pci/0000:05:00.0
>>     mode: switchdev
>>
>> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
> 
> I really don't see much value in this patch.  If you are going to
> support SwitchDev then just do it.  Otherwise you are adding extra
> overhead for maintaining two different modes.
> 
> I would recommend putting this series out to netdev as an RFC.
> Submitting it to intel-wired-lan is kind of pointless as the audience
> it to small to get any valuable review.
> 
> - Alex

I argued at length about this already. Jiri and company wanted this flag
to push device in and out of this mode. Here we are just following the
already upstreamed and debated decision.

This is less about switchdev and more about generating VF netdevs to
use with ip tools and friends.

Another option would be to just always enable VF netdevs and have no
legacy mode at all. I think that would be fine it just depends on if
you think having extra netdevs around will confuse the stack at all.
It might create a few corner cases but one reasonable thing to do
would be to just fix those cases as they appear.

Thanks,
John


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-10 16:50   ` John Fastabend
@ 2016-08-10 18:18     ` Alexander Duyck
  2016-08-10 18:41       ` John Fastabend
  0 siblings, 1 reply; 18+ messages in thread
From: Alexander Duyck @ 2016-08-10 18:18 UTC (permalink / raw)
  To: intel-wired-lan

On Wed, Aug 10, 2016 at 9:50 AM, John Fastabend
<john.fastabend@gmail.com> wrote:
> On 16-08-10 09:01 AM, Alexander Duyck wrote:
>> On Thu, Aug 4, 2016 at 9:49 AM, Sridhar Samudrala
>> <sridhar.samudrala@intel.com> wrote:
>>> Add initial devlink support to set/get the mode of SRIOV switch.
>>> This patch allows the mode to be set to either 'legacy' or 'switchdev', but
>>> doesn't implement any functionality to create vf representors in switchdev
>>> mode.
>>>
>>> With smode support in iproute2 'devlink' utility, switch mode can be set
>>> and get via following commands.
>>>
>>>     # devlink dev smode pci/0000:05:00.0
>>>     mode: legacy
>>>     # devlink dev set pci/0000:05:00.0 smode switchdev
>>>     # devlink dev smode pci/0000:05:00.0
>>>     mode: switchdev
>>>
>>> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
>>
>> I really don't see much value in this patch.  If you are going to
>> support SwitchDev then just do it.  Otherwise you are adding extra
>> overhead for maintaining two different modes.
>>
>> I would recommend putting this series out to netdev as an RFC.
>> Submitting it to intel-wired-lan is kind of pointless as the audience
>> it to small to get any valuable review.
>>
>> - Alex
>
> I argued at length about this already. Jiri and company wanted this flag
> to push device in and out of this mode. Here we are just following the
> already upstreamed and debated decision.

Yeah, I started doing more research after reviewing this patch.  I see
the idea behind it.  I think the issue if anything is that it seems
like things are a bit backwards.  We probably should be enabling the
SwitchDev bits first and then working on adding devlink knobs to
disable things later.

> This is less about switchdev and more about generating VF netdevs to
> use with ip tools and friends.

Right.  One of the issues I have with this patch set is that it seems
to get things backwards.  They are making VFs appear that don't do
much of anything and then trying to bolt on features after the fact.
We probably need to focus on enabling the VF representation, and then
providing the ability to switch them on and off.  Also I would argue
that we should actually be enabling switch features such as FDB
entries instead of trying to bolt on stuff like flow director which
doesn't really apply to very many switches and isn't as likely to be
used on a switch port.

> Another option would be to just always enable VF netdevs and have no
> legacy mode at all. I think that would be fine it just depends on if
> you think having extra netdevs around will confuse the stack at all.
> It might create a few corner cases but one reasonable thing to do
> would be to just fix those cases as they appear.

I'd say we are better off starting out with them just enabled and then
enabling the option to disable them after the fact.  If we are going
to have this extra code floating around we should be defaulting it to
enabled so that it is more likely to be used.  The legacy option
should only really be there so we can turn this off if we don't want
it.

- Alex

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-10 18:18     ` Alexander Duyck
@ 2016-08-10 18:41       ` John Fastabend
  2016-08-16 18:54         ` Samudrala, Sridhar
  0 siblings, 1 reply; 18+ messages in thread
From: John Fastabend @ 2016-08-10 18:41 UTC (permalink / raw)
  To: intel-wired-lan

On 16-08-10 11:18 AM, Alexander Duyck wrote:
> On Wed, Aug 10, 2016 at 9:50 AM, John Fastabend
> <john.fastabend@gmail.com> wrote:
>> On 16-08-10 09:01 AM, Alexander Duyck wrote:
>>> On Thu, Aug 4, 2016 at 9:49 AM, Sridhar Samudrala
>>> <sridhar.samudrala@intel.com> wrote:
>>>> Add initial devlink support to set/get the mode of SRIOV switch.
>>>> This patch allows the mode to be set to either 'legacy' or 'switchdev', but
>>>> doesn't implement any functionality to create vf representors in switchdev
>>>> mode.
>>>>
>>>> With smode support in iproute2 'devlink' utility, switch mode can be set
>>>> and get via following commands.
>>>>
>>>>     # devlink dev smode pci/0000:05:00.0
>>>>     mode: legacy
>>>>     # devlink dev set pci/0000:05:00.0 smode switchdev
>>>>     # devlink dev smode pci/0000:05:00.0
>>>>     mode: switchdev
>>>>
>>>> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
>>>
>>> I really don't see much value in this patch.  If you are going to
>>> support SwitchDev then just do it.  Otherwise you are adding extra
>>> overhead for maintaining two different modes.
>>>
>>> I would recommend putting this series out to netdev as an RFC.
>>> Submitting it to intel-wired-lan is kind of pointless as the audience
>>> it to small to get any valuable review.
>>>
>>> - Alex
>>
>> I argued at length about this already. Jiri and company wanted this flag
>> to push device in and out of this mode. Here we are just following the
>> already upstreamed and debated decision.
> 
> Yeah, I started doing more research after reviewing this patch.  I see
> the idea behind it.  I think the issue if anything is that it seems
> like things are a bit backwards.  We probably should be enabling the
> SwitchDev bits first and then working on adding devlink knobs to
> disable things later.
> 

Sure, although its not clear to me exactly which switchdev bits are
useful for an edge NIC like this. Getting switch ids is one thing
that will become useful when we enable multiple bridges.

Otherwise I don't see what l2 switchdev blocks are useful vs
just using the standard ndo op interfaces already in place when
working on a device without learning/aging/etc. The one thing
that I've never bothered to add is pushing "learned" rules down
into the hardware but I'm not convinced for most use cases this
is particularly interesting because you _should_ know in a managed
system what MAC addresses a VM/container/etc is allowed to use
ahead of time via libvirt or other mgmt stack. I haven't tested
the VLAN handling though so that needs to be looked at.

And l3 switchdev routing may be interesting but its fairly
low on my priority list unless someone is really excited about it.

>> This is less about switchdev and more about generating VF netdevs to
>> use with ip tools and friends.
> 
> Right.  One of the issues I have with this patch set is that it seems
> to get things backwards.  They are making VFs appear that don't do
> much of anything and then trying to bolt on features after the fact.
> We probably need to focus on enabling the VF representation, and then
> providing the ability to switch them on and off.  Also I would argue
> that we should actually be enabling switch features such as FDB
> entries instead of trying to bolt on stuff like flow director which
> doesn't really apply to very many switches and isn't as likely to be
> used on a switch port.

Fair enough. Organizing the patches better seems OK to me. I plan to
use the 'tc' offloaded mechanisms not the ethtool flow director
interface for virtual switch offloads.

> 
>> Another option would be to just always enable VF netdevs and have no
>> legacy mode at all. I think that would be fine it just depends on if
>> you think having extra netdevs around will confuse the stack at all.
>> It might create a few corner cases but one reasonable thing to do
>> would be to just fix those cases as they appear.
> 
> I'd say we are better off starting out with them just enabled and then
> enabling the option to disable them after the fact.  If we are going
> to have this extra code floating around we should be defaulting it to
> enabled so that it is more likely to be used.  The legacy option
> should only really be there so we can turn this off if we don't want
> it.
> 

Works for me.


> - Alex
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-04 16:49 [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Sridhar Samudrala
                   ` (5 preceding siblings ...)
  2016-08-10 16:01 ` Alexander Duyck
@ 2016-08-10 20:50 ` Jeff Kirsher
  6 siblings, 0 replies; 18+ messages in thread
From: Jeff Kirsher @ 2016-08-10 20:50 UTC (permalink / raw)
  To: intel-wired-lan

On Thu, 2016-08-04 at 18:49 +0200, Sridhar Samudrala wrote:
> 
> With smode support in iproute2 'devlink' utility, switch mode can be set
> and get via following commands.
> 
> ??? # devlink dev smode pci/0000:05:00.0
> ??? mode: legacy
> ??? # devlink dev set pci/0000:05:00.0 smode switchdev
> ??? # devlink dev smode pci/0000:05:00.0
> ??? mode: switchdev
> 
> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
> ---
> ?drivers/net/ethernet/intel/Kconfig????????? |? 1 +
> ?drivers/net/ethernet/intel/i40e/i40e.h????? |? 3 ++
> ?drivers/net/ethernet/intel/i40e/i40e_main.c | 84
> ++++++++++++++++++++++++++---
> ?3 files changed, 80 insertions(+), 8 deletions(-)

Since there was no title patch to this series, I am using patch 1 of the
series to address the whole series.

Since Alex has brought up some good concerns/comments that should be
addressed (and the fact that this series does not apply cleanly to my
current tree), I am dropping this series. ?I will await a v2 where Alex's
comments are addressed and the series is rebased against my current dev-
queue branch.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.osuosl.org/pipermail/intel-wired-lan/attachments/20160810/8079a75b/attachment.asc>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-10 18:41       ` John Fastabend
@ 2016-08-16 18:54         ` Samudrala, Sridhar
  2016-08-16 20:39           ` Alexander Duyck
  0 siblings, 1 reply; 18+ messages in thread
From: Samudrala, Sridhar @ 2016-08-16 18:54 UTC (permalink / raw)
  To: intel-wired-lan

On 8/10/2016 11:41 AM, John Fastabend wrote:
> On 16-08-10 11:18 AM, Alexander Duyck wrote:
>> On Wed, Aug 10, 2016 at 9:50 AM, John Fastabend
>> <john.fastabend@gmail.com> wrote:
>>> On 16-08-10 09:01 AM, Alexander Duyck wrote:
>>>> On Thu, Aug 4, 2016 at 9:49 AM, Sridhar Samudrala
>>>> <sridhar.samudrala@intel.com> wrote:
>>>>> Add initial devlink support to set/get the mode of SRIOV switch.
>>>>> This patch allows the mode to be set to either 'legacy' or 'switchdev', but
>>>>> doesn't implement any functionality to create vf representors in switchdev
>>>>> mode.
>>>>>
>>>>> With smode support in iproute2 'devlink' utility, switch mode can be set
>>>>> and get via following commands.
>>>>>
>>>>>      # devlink dev smode pci/0000:05:00.0
>>>>>      mode: legacy
>>>>>      # devlink dev set pci/0000:05:00.0 smode switchdev
>>>>>      # devlink dev smode pci/0000:05:00.0
>>>>>      mode: switchdev
>>>>>
>>>>> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
>>>> I really don't see much value in this patch.  If you are going to
>>>> support SwitchDev then just do it.  Otherwise you are adding extra
>>>> overhead for maintaining two different modes.
>>>>
>>>> I would recommend putting this series out to netdev as an RFC.
>>>> Submitting it to intel-wired-lan is kind of pointless as the audience
>>>> it to small to get any valuable review.

As these patches have only changes to i40e driver, i didn't include 
netdev.  Will do so when
i submit v2.

>>>>
>>>> - Alex
>>> I argued at length about this already. Jiri and company wanted this flag
>>> to push device in and out of this mode. Here we are just following the
>>> already upstreamed and debated decision.
>> Yeah, I started doing more research after reviewing this patch.  I see
>> the idea behind it.  I think the issue if anything is that it seems
>> like things are a bit backwards.  We probably should be enabling the
>> SwitchDev bits first and then working on adding devlink knobs to
>> disable things later.
>>
> Sure, although its not clear to me exactly which switchdev bits are
> useful for an edge NIC like this. Getting switch ids is one thing
> that will become useful when we enable multiple bridges.

This patchset is not really adding any switchdev ops.  We are calling 
the mode in which the
PF driver creates VF representors as SWITCHDEV mode (this is based on 
the netdev
discussion of mellanox patches).

Amritha has a patch to add switchdev ops to VF representor netdevs that 
enables returning
switch id via switchdev_port_attr_get() op.
>
> Otherwise I don't see what l2 switchdev blocks are useful vs
> just using the standard ndo op interfaces already in place when
> working on a device without learning/aging/etc. The one thing
> that I've never bothered to add is pushing "learned" rules down
> into the hardware but I'm not convinced for most use cases this
> is particularly interesting because you _should_ know in a managed
> system what MAC addresses a VM/container/etc is allowed to use
> ahead of time via libvirt or other mgmt stack. I haven't tested
> the VLAN handling though so that needs to be looked at.
>
> And l3 switchdev routing may be interesting but its fairly
> low on my priority list unless someone is really excited about it.
>
>>> This is less about switchdev and more about generating VF netdevs to
>>> use with ip tools and friends.
>> Right.  One of the issues I have with this patch set is that it seems
>> to get things backwards.  They are making VFs appear that don't do
>> much of anything and then trying to bolt on features after the fact.
>> We probably need to focus on enabling the VF representation, and then
>> providing the ability to switch them on and off.  Also I would argue
>> that we should actually be enabling switch features such as FDB
>> entries instead of trying to bolt on stuff like flow director which
>> doesn't really apply to very many switches and isn't as likely to be
>> used on a switch port.
> Fair enough. Organizing the patches better seems OK to me. I plan to
> use the 'tc' offloaded mechanisms not the ethtool flow director
> interface for virtual switch offloads.
>
>>> Another option would be to just always enable VF netdevs and have no
>>> legacy mode at all. I think that would be fine it just depends on if
>>> you think having extra netdevs around will confuse the stack at all.
>>> It might create a few corner cases but one reasonable thing to do
>>> would be to just fix those cases as they appear.
>> I'd say we are better off starting out with them just enabled and then
>> enabling the option to disable them after the fact.  If we are going
>> to have this extra code floating around we should be defaulting it to
>> enabled so that it is more likely to be used.  The legacy option
>> should only really be there so we can turn this off if we don't want
>> it.
>>
> Works for me.
>

OK.  If we all agree that 'creating VF netdevs' by default is the right 
way to go,  I will rearrange the
patchset in the following order.
    - Introduce VF representor netdevs (create VF netdevs by default 
when sr-iov VFs are enabled)
    - enable ethtool stats on VF representors
    - enable ntuple filters on VF representors
    - introduce devlink to enable the switch mode to be changed to 
'legacy' (no VF netdevs)

Thanks
Sridhar



^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-16 18:54         ` Samudrala, Sridhar
@ 2016-08-16 20:39           ` Alexander Duyck
  2016-08-16 21:51             ` Samudrala, Sridhar
  0 siblings, 1 reply; 18+ messages in thread
From: Alexander Duyck @ 2016-08-16 20:39 UTC (permalink / raw)
  To: intel-wired-lan

On Tue, Aug 16, 2016 at 11:54 AM, Samudrala, Sridhar
<sridhar.samudrala@intel.com> wrote:
> On 8/10/2016 11:41 AM, John Fastabend wrote:
>>
>> On 16-08-10 11:18 AM, Alexander Duyck wrote:
>>>
>>> On Wed, Aug 10, 2016 at 9:50 AM, John Fastabend
>>> <john.fastabend@gmail.com> wrote:
>>>>
>>>> On 16-08-10 09:01 AM, Alexander Duyck wrote:
>>>>>
>>>>> On Thu, Aug 4, 2016 at 9:49 AM, Sridhar Samudrala
>>>>> <sridhar.samudrala@intel.com> wrote:
>>>>>>
>>>>>> Add initial devlink support to set/get the mode of SRIOV switch.
>>>>>> This patch allows the mode to be set to either 'legacy' or
>>>>>> 'switchdev', but
>>>>>> doesn't implement any functionality to create vf representors in
>>>>>> switchdev
>>>>>> mode.
>>>>>>
>>>>>> With smode support in iproute2 'devlink' utility, switch mode can be
>>>>>> set
>>>>>> and get via following commands.
>>>>>>
>>>>>>      # devlink dev smode pci/0000:05:00.0
>>>>>>      mode: legacy
>>>>>>      # devlink dev set pci/0000:05:00.0 smode switchdev
>>>>>>      # devlink dev smode pci/0000:05:00.0
>>>>>>      mode: switchdev
>>>>>>
>>>>>> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
>>>>>
>>>>> I really don't see much value in this patch.  If you are going to
>>>>> support SwitchDev then just do it.  Otherwise you are adding extra
>>>>> overhead for maintaining two different modes.
>>>>>
>>>>> I would recommend putting this series out to netdev as an RFC.
>>>>> Submitting it to intel-wired-lan is kind of pointless as the audience
>>>>> it to small to get any valuable review.
>
>
> As these patches have only changes to i40e driver, i didn't include netdev.
> Will do so when
> i submit v2.
>
>>>>>
>>>>> - Alex
>>>>
>>>> I argued at length about this already. Jiri and company wanted this flag
>>>> to push device in and out of this mode. Here we are just following the
>>>> already upstreamed and debated decision.
>>>
>>> Yeah, I started doing more research after reviewing this patch.  I see
>>> the idea behind it.  I think the issue if anything is that it seems
>>> like things are a bit backwards.  We probably should be enabling the
>>> SwitchDev bits first and then working on adding devlink knobs to
>>> disable things later.
>>>
>> Sure, although its not clear to me exactly which switchdev bits are
>> useful for an edge NIC like this. Getting switch ids is one thing
>> that will become useful when we enable multiple bridges.
>
>
> This patchset is not really adding any switchdev ops.  We are calling the
> mode in which the
> PF driver creates VF representors as SWITCHDEV mode (this is based on the
> netdev
> discussion of mellanox patches).
>
> Amritha has a patch to add switchdev ops to VF representor netdevs that
> enables returning
> switch id via switchdev_port_attr_get() op.
>
>>
>> Otherwise I don't see what l2 switchdev blocks are useful vs
>> just using the standard ndo op interfaces already in place when
>> working on a device without learning/aging/etc. The one thing
>> that I've never bothered to add is pushing "learned" rules down
>> into the hardware but I'm not convinced for most use cases this
>> is particularly interesting because you _should_ know in a managed
>> system what MAC addresses a VM/container/etc is allowed to use
>> ahead of time via libvirt or other mgmt stack. I haven't tested
>> the VLAN handling though so that needs to be looked at.
>>
>> And l3 switchdev routing may be interesting but its fairly
>> low on my priority list unless someone is really excited about it.
>>
>>>> This is less about switchdev and more about generating VF netdevs to
>>>> use with ip tools and friends.
>>>
>>> Right.  One of the issues I have with this patch set is that it seems
>>> to get things backwards.  They are making VFs appear that don't do
>>> much of anything and then trying to bolt on features after the fact.
>>> We probably need to focus on enabling the VF representation, and then
>>> providing the ability to switch them on and off.  Also I would argue
>>> that we should actually be enabling switch features such as FDB
>>> entries instead of trying to bolt on stuff like flow director which
>>> doesn't really apply to very many switches and isn't as likely to be
>>> used on a switch port.
>>
>> Fair enough. Organizing the patches better seems OK to me. I plan to
>> use the 'tc' offloaded mechanisms not the ethtool flow director
>> interface for virtual switch offloads.
>>
>>>> Another option would be to just always enable VF netdevs and have no
>>>> legacy mode at all. I think that would be fine it just depends on if
>>>> you think having extra netdevs around will confuse the stack at all.
>>>> It might create a few corner cases but one reasonable thing to do
>>>> would be to just fix those cases as they appear.
>>>
>>> I'd say we are better off starting out with them just enabled and then
>>> enabling the option to disable them after the fact.  If we are going
>>> to have this extra code floating around we should be defaulting it to
>>> enabled so that it is more likely to be used.  The legacy option
>>> should only really be there so we can turn this off if we don't want
>>> it.
>>>
>> Works for me.
>>
>
> OK.  If we all agree that 'creating VF netdevs' by default is the right way
> to go,  I will rearrange the
> patchset in the following order.
>    - Introduce VF representor netdevs (create VF netdevs by default when
> sr-iov VFs are enabled)
>    - enable ethtool stats on VF representors
>    - enable ntuple filters on VF representors

No NTUPLE filters.  This is not the way the community will want to do
this, and this is not how I want to do this.

The easy way to think of this is that NTUPLE is just an alias for Rx
network flow classification.  Now where are the VF representors
supposed to receive traffic from?  It is the Tx path of the VF.  So by
putting NTUPLE filters on the VF representors you are implying  you
can get in the Tx path for the VF for all traffic, and last I knew
that was not the case.  As such it makes no sense to do it this way.

>    - introduce devlink to enable the switch mode to be changed to 'legacy'
> (no VF netdevs)
>
> Thanks
> Sridhar

You have to think of the VF representors as switch ports.  They don't
actually represent the VFs.  So if you think about it a switch port
will only have a few real features.  You can control the VLAN
membership, tagged/untagged, what FDB entries are assigned to the
port, the link speed, if the link is up/down, etc.  So some of your
first steps might be to focus on the L1 and L2 type items.  So for
example you might see if we can do things like control the upper limit
of the MTU for the VF by controlling the MTU for the VF representor.
See if you can at least make it so that the MAC and VLAN values for
the VF show up as FDB entries for the VF representor and so forth.

- Alex

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-16 20:39           ` Alexander Duyck
@ 2016-08-16 21:51             ` Samudrala, Sridhar
  2016-08-17  0:17               ` Alexander Duyck
  0 siblings, 1 reply; 18+ messages in thread
From: Samudrala, Sridhar @ 2016-08-16 21:51 UTC (permalink / raw)
  To: intel-wired-lan

On 8/16/2016 1:39 PM, Alexander Duyck wrote:
> On Tue, Aug 16, 2016 at 11:54 AM, Samudrala, Sridhar
> <sridhar.samudrala@intel.com> wrote:
>> On 8/10/2016 11:41 AM, John Fastabend wrote:
>>> On 16-08-10 11:18 AM, Alexander Duyck wrote:
>>>> On Wed, Aug 10, 2016 at 9:50 AM, John Fastabend
>>>> <john.fastabend@gmail.com> wrote:
>>>>> On 16-08-10 09:01 AM, Alexander Duyck wrote:
>>>>>> On Thu, Aug 4, 2016 at 9:49 AM, Sridhar Samudrala
>>>>>> <sridhar.samudrala@intel.com> wrote:
>>>>>>> Add initial devlink support to set/get the mode of SRIOV switch.
>>>>>>> This patch allows the mode to be set to either 'legacy' or
>>>>>>> 'switchdev', but
>>>>>>> doesn't implement any functionality to create vf representors in
>>>>>>> switchdev
>>>>>>> mode.
>>>>>>>
>>>>>>> With smode support in iproute2 'devlink' utility, switch mode can be
>>>>>>> set
>>>>>>> and get via following commands.
>>>>>>>
>>>>>>>       # devlink dev smode pci/0000:05:00.0
>>>>>>>       mode: legacy
>>>>>>>       # devlink dev set pci/0000:05:00.0 smode switchdev
>>>>>>>       # devlink dev smode pci/0000:05:00.0
>>>>>>>       mode: switchdev
>>>>>>>
>>>>>>> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
>>>>>> I really don't see much value in this patch.  If you are going to
>>>>>> support SwitchDev then just do it.  Otherwise you are adding extra
>>>>>> overhead for maintaining two different modes.
>>>>>>
>>>>>> I would recommend putting this series out to netdev as an RFC.
>>>>>> Submitting it to intel-wired-lan is kind of pointless as the audience
>>>>>> it to small to get any valuable review.
>>
>> As these patches have only changes to i40e driver, i didn't include netdev.
>> Will do so when
>> i submit v2.
>>
>>>>>> - Alex
>>>>> I argued at length about this already. Jiri and company wanted this flag
>>>>> to push device in and out of this mode. Here we are just following the
>>>>> already upstreamed and debated decision.
>>>> Yeah, I started doing more research after reviewing this patch.  I see
>>>> the idea behind it.  I think the issue if anything is that it seems
>>>> like things are a bit backwards.  We probably should be enabling the
>>>> SwitchDev bits first and then working on adding devlink knobs to
>>>> disable things later.
>>>>
>>> Sure, although its not clear to me exactly which switchdev bits are
>>> useful for an edge NIC like this. Getting switch ids is one thing
>>> that will become useful when we enable multiple bridges.
>>
>> This patchset is not really adding any switchdev ops.  We are calling the
>> mode in which the
>> PF driver creates VF representors as SWITCHDEV mode (this is based on the
>> netdev
>> discussion of mellanox patches).
>>
>> Amritha has a patch to add switchdev ops to VF representor netdevs that
>> enables returning
>> switch id via switchdev_port_attr_get() op.
>>
>>> Otherwise I don't see what l2 switchdev blocks are useful vs
>>> just using the standard ndo op interfaces already in place when
>>> working on a device without learning/aging/etc. The one thing
>>> that I've never bothered to add is pushing "learned" rules down
>>> into the hardware but I'm not convinced for most use cases this
>>> is particularly interesting because you _should_ know in a managed
>>> system what MAC addresses a VM/container/etc is allowed to use
>>> ahead of time via libvirt or other mgmt stack. I haven't tested
>>> the VLAN handling though so that needs to be looked at.
>>>
>>> And l3 switchdev routing may be interesting but its fairly
>>> low on my priority list unless someone is really excited about it.
>>>
>>>>> This is less about switchdev and more about generating VF netdevs to
>>>>> use with ip tools and friends.
>>>> Right.  One of the issues I have with this patch set is that it seems
>>>> to get things backwards.  They are making VFs appear that don't do
>>>> much of anything and then trying to bolt on features after the fact.
>>>> We probably need to focus on enabling the VF representation, and then
>>>> providing the ability to switch them on and off.  Also I would argue
>>>> that we should actually be enabling switch features such as FDB
>>>> entries instead of trying to bolt on stuff like flow director which
>>>> doesn't really apply to very many switches and isn't as likely to be
>>>> used on a switch port.
>>> Fair enough. Organizing the patches better seems OK to me. I plan to
>>> use the 'tc' offloaded mechanisms not the ethtool flow director
>>> interface for virtual switch offloads.
>>>
>>>>> Another option would be to just always enable VF netdevs and have no
>>>>> legacy mode at all. I think that would be fine it just depends on if
>>>>> you think having extra netdevs around will confuse the stack at all.
>>>>> It might create a few corner cases but one reasonable thing to do
>>>>> would be to just fix those cases as they appear.
>>>> I'd say we are better off starting out with them just enabled and then
>>>> enabling the option to disable them after the fact.  If we are going
>>>> to have this extra code floating around we should be defaulting it to
>>>> enabled so that it is more likely to be used.  The legacy option
>>>> should only really be there so we can turn this off if we don't want
>>>> it.
>>>>
>>> Works for me.
>>>
>> OK.  If we all agree that 'creating VF netdevs' by default is the right way
>> to go,  I will rearrange the
>> patchset in the following order.
>>     - Introduce VF representor netdevs (create VF netdevs by default when
>> sr-iov VFs are enabled)
>>     - enable ethtool stats on VF representors
>>     - enable ntuple filters on VF representors
> No NTUPLE filters.  This is not the way the community will want to do
> this, and this is not how I want to do this.
>
> The easy way to think of this is that NTUPLE is just an alias for Rx
> network flow classification.  Now where are the VF representors
> supposed to receive traffic from?  It is the Tx path of the VF.  So by
> putting NTUPLE filters on the VF representors you are implying  you
> can get in the Tx path for the VF for all traffic, and last I knew
> that was not the case.  As such it makes no sense to do it this way.

The idea behind NTUPLE filters on VF representors is to match RX traffic that is destined
for the corresponding VF. Basically when adding the flow director filter, we are setting the
dest vsi as the VSI of the corresponding VF.
Without VF representors, this is currently done by overloading user-def value with VF id.


>
>>     - introduce devlink to enable the switch mode to be changed to 'legacy'
>> (no VF netdevs)
>>
>> Thanks
>> Sridhar
> You have to think of the VF representors as switch ports.  They don't
> actually represent the VFs.  So if you think about it a switch port
> will only have a few real features.  You can control the VLAN
> membership, tagged/untagged, what FDB entries are assigned to the
> port, the link speed, if the link is up/down, etc.  So some of your
> first steps might be to focus on the L1 and L2 type items.  So for
> example you might see if we can do things like control the upper limit
> of the MTU for the VF by controlling the MTU for the VF representor.
> See if you can at least make it so that the MAC and VLAN values for
> the VF show up as FDB entries for the VF representor and so forth.

Sure.  These are additional items that we can look into that can be 
enabled via VF representors.
>
> - Alex


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-16 21:51             ` Samudrala, Sridhar
@ 2016-08-17  0:17               ` Alexander Duyck
  2016-08-17 19:24                 ` John Fastabend
  0 siblings, 1 reply; 18+ messages in thread
From: Alexander Duyck @ 2016-08-17  0:17 UTC (permalink / raw)
  To: intel-wired-lan

On Tue, Aug 16, 2016 at 2:51 PM, Samudrala, Sridhar
<sridhar.samudrala@intel.com> wrote:
> The idea behind NTUPLE filters on VF representors is to match RX traffic
> that is destined
> for the corresponding VF. Basically when adding the flow director filter, we
> are setting the
> dest vsi as the VSI of the corresponding VF.
> Without VF representors, this is currently done by overloading user-def
> value with VF id.
>

Yes, and for now that is preferable to what you are doing since it
makes no sense.

I will try to clarify my point.  First, instead of calling these VF
representors lets call them VF port representor, or VFPR for short
(trying to avoid VFR because it sounds too much like VRF which is
completely unrelated).  So the whole point of a VFPR is to present the
VF as a port on your logical switch.  You should mostly only be
capable of doing on it what you would do with an actual switch.  So
for example if I have a switch connected to NIC.  Now most switches
have certain options for controlling things.  For example I can force
the switch port onto a given VLAN so that the port is isolated from
other traffic.  I can control the max frame size allowed to be sent or
received over the port.  I can block what traffic the port is allowed
to send by restricting MAC addresses via ACLs, and I can manually
program the forwarding table via FDB entries.

The problem is Flow Director also know as NTUPLE or Rx NFC only
operates on the Rx side of NICs.  In your VFPR the Rx side represents
packets coming from your VF, not packets going to it.   If you
transmit on a VFPR the packet should be received only by the VF, if
you receive from it the packet should only be from the VF.  So really
any Flow Director rule you add to the VFPR should not impact the VF,
but instead should impact the VFPR and the PF since the PF will add
the rule and it should only apply on traffic received on the VFPR from
the VF.

At some point in the future we can look at adding flow Tx offloads via
either TC or SwitchDev.  For now adding NTUPLE is a strong no-go.  The
VFPR are not the place to do it unless you are just wanting to add
custom rules to the PF itself that won't impact the VF.

- Alex

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface.
  2016-08-17  0:17               ` Alexander Duyck
@ 2016-08-17 19:24                 ` John Fastabend
  0 siblings, 0 replies; 18+ messages in thread
From: John Fastabend @ 2016-08-17 19:24 UTC (permalink / raw)
  To: intel-wired-lan

On 16-08-16 05:17 PM, Alexander Duyck wrote:
> On Tue, Aug 16, 2016 at 2:51 PM, Samudrala, Sridhar
> <sridhar.samudrala@intel.com> wrote:
>> The idea behind NTUPLE filters on VF representors is to match RX traffic
>> that is destined
>> for the corresponding VF. Basically when adding the flow director filter, we
>> are setting the
>> dest vsi as the VSI of the corresponding VF.
>> Without VF representors, this is currently done by overloading user-def
>> value with VF id.
>>
> 
> Yes, and for now that is preferable to what you are doing since it
> makes no sense.
> 
> I will try to clarify my point.  First, instead of calling these VF
> representors lets call them VF port representor, or VFPR for short
> (trying to avoid VFR because it sounds too much like VRF which is
> completely unrelated).  So the whole point of a VFPR is to present the
> VF as a port on your logical switch.  You should mostly only be
> capable of doing on it what you would do with an actual switch.  So
> for example if I have a switch connected to NIC.  Now most switches
> have certain options for controlling things.  For example I can force
> the switch port onto a given VLAN so that the port is isolated from
> other traffic.  I can control the max frame size allowed to be sent or
> received over the port.  I can block what traffic the port is allowed
> to send by restricting MAC addresses via ACLs, and I can manually
> program the forwarding table via FDB entries.
> 
> The problem is Flow Director also know as NTUPLE or Rx NFC only
> operates on the Rx side of NICs.  In your VFPR the Rx side represents
> packets coming from your VF, not packets going to it.   If you
> transmit on a VFPR the packet should be received only by the VF, if
> you receive from it the packet should only be from the VF.  So really
> any Flow Director rule you add to the VFPR should not impact the VF,
> but instead should impact the VFPR and the PF since the PF will add
> the rule and it should only apply on traffic received on the VFPR from
> the VF.
> 
> At some point in the future we can look at adding flow Tx offloads via
> either TC or SwitchDev.  For now adding NTUPLE is a strong no-go.  The
> VFPR are not the place to do it unless you are just wanting to add
> custom rules to the PF itself that won't impact the VF.
> 
> - Alex
> 

FWIW the features I want from the VFPR (nice acronym :) are the
following,

  - xmit (just send it over the PF queues is fine)
  - recv : ideally the driver does this with descriptor magic but
           the next best thing would be to do this in the driver
	   with a lookup on mac when anti-spoofing is on. Or worst
           case if the driver logic is too ugly I'll hack it in
           userspace :(
  - up/down link state
  - statistics
  - not essential but mtu would also be nice

I think thats it to get my basic stuff working in userspace. Another
nice piece would be the fdb_add offloads so we can get a basic l2 bridge
working and bonus points for including the vlan parts of the fdb_add
msg.

Thanks,
John

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2016-08-17 19:24 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-04 16:49 [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Sridhar Samudrala
2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 2/5] i40e: Introduce VF representor/control netdevs Sridhar Samudrala
2016-08-10  0:18   ` Nambiar, Amritha
2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 3/5] i40e: Enable VF specific ethtool statistics via VF representor netdevs Sridhar Samudrala
2016-08-10  0:21   ` Nambiar, Amritha
2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 4/5] i40e: Refactor flow director filter management to make it per VSI Sridhar Samudrala
2016-08-04 16:49 ` [Intel-wired-lan] [next PATCH 5/5] i40e: Enable ntuple filters on VFs via VF representor netdevs Sridhar Samudrala
2016-08-10  0:12 ` [Intel-wired-lan] [next PATCH 1/5] i40e: Introduce devlink interface Nambiar, Amritha
2016-08-10 16:01 ` Alexander Duyck
2016-08-10 16:50   ` John Fastabend
2016-08-10 18:18     ` Alexander Duyck
2016-08-10 18:41       ` John Fastabend
2016-08-16 18:54         ` Samudrala, Sridhar
2016-08-16 20:39           ` Alexander Duyck
2016-08-16 21:51             ` Samudrala, Sridhar
2016-08-17  0:17               ` Alexander Duyck
2016-08-17 19:24                 ` John Fastabend
2016-08-10 20:50 ` Jeff Kirsher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.