All of lore.kernel.org
 help / color / mirror / Atom feed
From: Simon Horman <horms@verge.net.au>
To: e1000-devel@lists.sourceforge.net, netdev@vger.kernel.org
Cc: Arnd Bergmann <arndbergmann@googlemail.com>,
	Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Subject: [rfc 4/4] igb: expose 82576 bandiwidth allocation
Date: Thu, 05 Nov 2009 11:58:51 +1100	[thread overview]
Message-ID: <20091105010628.148945886@vergenet.net> (raw)
In-Reply-To: 20091105005847.941190065@vergenet.net

[-- Attachment #1: igb-ba.patch --]
[-- Type: text/plain, Size: 12335 bytes --]

The 82576 has support for bandwidth allocation to VFs.

Contrary to the documentation in the 82576 datasheet v2.41 this
appears to work as follows:

* The ratio supplied is always proportional to 1Gbit/s,
  regardless of if the link speed.
* The ratio supplied is an upper-bound on bandwidth available
  to the VF, not a minimun guarantee

This patch exposes bandwidth control to userspace through a simple
per-device (PF) sysfs file, bandwidth_allocation.

* The file contains a whitespace delimited list of values, one per VF.
* The first value corresponds to the first VF and so on.
* Valid values are integers from 0 to 1000
* A value of 0 indicates that bandwidth_allocation is disabled.
* Other values indicate the allocated bandwidth, in 1/1000ths of a gigabit/s

e.g. The following for a PF with 4 VFs allocates ~20Mbits/ to VF 1,
     ~100Mbit/s to VF 2, and leave the other 2 VFs with no allocation.

     echo "20 100 0 0" > /sys/class/net/eth3/device/bandwidth_allocation

This interface is intended to allow testing of the hardware feature.
There are ongoing discussions about how to expose this feature
to user-space in a more generic way.

Signed-off-by: Simon Horman <horms@verge.net.au>

Index: net-next-2.6/drivers/net/igb/igb_main.c
===================================================================
--- net-next-2.6.orig/drivers/net/igb/igb_main.c	2009-11-05 04:55:06.000000000 +0900
+++ net-next-2.6/drivers/net/igb/igb_main.c	2009-11-05 05:12:54.000000000 +0900
@@ -47,6 +47,9 @@
 #ifdef CONFIG_IGB_DCA
 #include <linux/dca.h>
 #endif
+#ifdef CONFIG_PCI_IOV
+#include <linux/ctype.h>
+#endif
 #include "igb.h"
 
 #define DRV_VERSION "1.3.16-k2"
@@ -152,6 +155,15 @@ static unsigned int max_vfs = 0;
 module_param(max_vfs, uint, 0);
 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
                  "per physical function");
+
+static ssize_t igb_set_bandwidth_allocation(struct device *,
+					    struct device_attribute *,
+					    const char *, size_t);
+static ssize_t igb_show_bandwidth_allocation(struct device *,
+					     struct device_attribute *,
+					     char *);
+DEVICE_ATTR(bandwidth_allocation, S_IRUGO | S_IWUSR,
+	    igb_show_bandwidth_allocation, igb_set_bandwidth_allocation);
 #endif /* CONFIG_PCI_IOV */
 
 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
@@ -1754,7 +1766,18 @@ static void __devinit igb_init_vf(struct
 	}
 
 	if (pci_enable_sriov(pdev, vfn))
-		goto err;
+		goto err_kfree;
+
+	if (device_create_file(&pdev->dev, &dev_attr_bandwidth_allocation))
+		goto err_sriov;
+
+	adapter->bandwidth_allocation = kcalloc(vfn, sizeof(unsigned int),
+						GFP_KERNEL);
+	if (!adapter->bandwidth_allocation)
+		goto err_file;
+	memset(adapter->bandwidth_allocation, vfn * sizeof(unsigned int), 0);
+
+	spin_lock_init(&adapter->bandwidth_allocation_lock);
 
 	dev_info(&pdev->dev, "%d vfs allocated\n", vfn);
 	for (i = 0; i < vfn; i++) {
@@ -1765,7 +1788,11 @@ static void __devinit igb_init_vf(struct
 	adapter->vfs_allocated_count = vfn;
 
 	return;
-err:
+err_file:
+	device_remove_file(&pdev->dev, &dev_attr_bandwidth_allocation);
+err_sriov:
+	pci_disable_sriov(pdev);
+err_kfree:
 	kfree(adapter->vf_data);
 	adapter->vf_data = NULL;
 #endif /* CONFIG_PCI_IOV */
@@ -1781,6 +1808,7 @@ err:
 static void igb_cleanup_vf(struct igb_adapter * adapter)
 {
 #ifdef CONFIG_PCI_IOV
+	struct pci_dev *pdev = adapter->pdev;
 	struct e1000_hw *hw = &adapter->hw;
 
 	if (!adapter->vf_data)
@@ -1797,6 +1825,9 @@ static void igb_cleanup_vf(struct igb_ad
 	wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
 	msleep(100);
 	dev_info(&adapter->pdev->dev, "IOV Disabled\n");
+
+	device_remove_file(&pdev->dev, &dev_attr_bandwidth_allocation);
+	kfree(adapter->bandwidth_allocation);
 #endif
 }
 
@@ -2088,6 +2119,123 @@ void igb_configure_tx_ring(struct igb_ad
 	wr32(E1000_TXDCTL(reg_idx), txdctl);
 }
 
+#ifdef CONFIG_PCI_IOV
+static void igb_disable_bandwidth_allocation_vf(struct e1000_hw *hw, int vf)
+{
+	wr32(E1000_VMBASEL, vf);
+	wr32(E1000_VMBAC, 0);
+}
+
+static void igb_disable_bandwidth_allocation(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < adapter->vfs_allocated_count; i++)
+		igb_disable_bandwidth_allocation_vf(hw, i);
+}
+
+static void igb_enable_bandwidth_allocation_vf(struct e1000_hw *hw, int vf,
+					       unsigned int allocation)
+{
+	u32 rq;
+
+	/* Allocation is expressed as 1000ths of link speed [+]
+	 *
+	 * rq is calcualted as 1 / (allocation / 1000) = 1000 / allocation
+	 *
+	 * E1000_VMBAC_RF_INT_SHIFT and E1000_VMBAC_RF_MASK are used
+	 * to marshal the result into the desired format: 23 bits of
+	 * which 14 are to the right of the decimal point.
+	 *
+	 * [+] According to the the 82576 v2.41 datasheet rq should
+	 *     be a ratio of the link speed, however, empirically
+	 *     it appears to always be a ration of to 1Gbit/s,
+	 *     even when the link is 100Mbit/s.
+	 */
+	rq = ((1000 << E1000_VMBAC_RF_INT_SHIFT) / allocation) &
+	     E1000_VMBAC_RF_MASK;
+
+	wr32(E1000_VMBASEL, vf);
+	wr32(E1000_VMBAC, rq|E1000_VMBAC_RC_ENA);
+}
+
+static void igb_enable_bandwidth_allocation(struct igb_adapter *adapter)
+{
+	u32 i, reg;
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Only enable bandwidth_allocation if it has been set
+	 * and the link speed is 100Mbit/s or 1Gbit/s */
+	if (!adapter->bandwidth_allocation ||
+	    (adapter->link_speed != SPEED_100 &&
+	     adapter->link_speed != SPEED_1000)) {
+		igb_disable_bandwidth_allocation(adapter);
+		return;
+	}
+
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		wr32(E1000_VMBASEL, i);
+		if (adapter->bandwidth_allocation[i])
+			igb_enable_bandwidth_allocation_vf(hw, i,
+					adapter->bandwidth_allocation[i]);
+		else
+			igb_disable_bandwidth_allocation_vf(hw, i);
+
+		/* XXX:
+		 *
+		 * The 82576 datasheet, section 4.5.11.1.5.1 "Configuring Tx
+		 * Bandwidth to VMs" states that the desired setting is:
+		 * VMBAMMW.MMW_SIZE = 16 * MSS
+		 *
+		 * But isn't  MSS a property of skbs that are using tso
+		 * rather than adapters?
+		 *
+		 * If so, should we use the maximum value here? */
+		/* XXX: Should this go inside or outside the for loop ? */
+		reg = 64 * 16;
+		wr32(E1000_VMBAMMW, reg);
+	}
+}
+#endif
+
+static void igb_check_bandwidth_allocation(struct igb_adapter *adapter)
+{
+#ifdef CONFIG_PCI_IOV
+	u32 vmbacs;
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (!adapter->vf_data)
+		return;
+
+	/* The 82576 datasheet, section 4.5.11.1.5.2 "Link Speed Change
+	 * Procedure" describes the sequence below. However the
+	 * SPEED_CHG never seems to be set.
+	 */
+	vmbacs = rd32(E1000_VMBACS);
+	if (vmbacs & E1000_VMBACS_SPEED_CHG) {
+		/* XXX: Never seem to get here */
+		int err = 0;
+
+		if (vmbacs & E1000_VMBACS_VMBA_SET) {
+			igb_disable_bandwidth_allocation(adapter);
+			err = 1;
+		}
+
+		vmbacs &= ~E1000_VMBACS_SPEED_CHG;
+		wr32(E1000_VMBACS, vmbacs);
+
+		if (err)
+			return;
+	}
+
+	spin_lock(&adapter->bandwidth_allocation_lock);
+	igb_enable_bandwidth_allocation(adapter);
+	spin_unlock(&adapter->bandwidth_allocation_lock);
+#endif
+	return;
+}
+
 /**
  * igb_configure_tx - Configure transmit Unit after Reset
  * @adapter: board private structure
@@ -2969,6 +3117,8 @@ static void igb_watchdog_task(struct wor
 				break;
 			}
 
+			igb_check_bandwidth_allocation(adapter);
+
 			netif_carrier_on(netdev);
 
 			igb_ping_all_vfs(adapter);
@@ -5854,4 +6004,101 @@ static void igb_vmm_control(struct igb_a
 	}
 }
 
+#ifdef CONFIG_PCI_IOV
+static ssize_t igb_show_bandwidth_allocation(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (!adapter->vf_data)
+		return -ENOENT;
+
+	*buf = '\0';
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		if (i > 0)
+			strcat(buf, " ");
+		sprintf(buf + strlen(buf), "%i",
+			adapter->bandwidth_allocation[i]);
+	}
+	strcat(buf, "\n");
+
+	return strlen(buf);
+}
+
+static unsigned long igb_strtoul(const char *cp, char **endp, unsigned int base)
+{
+	const char *orig = cp;
+	unsigned long x;
+
+	while (isspace(*cp))
+		cp++;
+
+	x = simple_strtoul(cp, endp, base);
+	if (cp == *endp)
+		*endp = (char *)orig;
+
+	return x;
+}
+
+static ssize_t igb_set_bandwidth_allocation(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int i;
+	size_t len;
+	ssize_t status = -ENOENT;
+	unsigned int *new, total;
+	unsigned long x;
+	const char *p;
+	char *next_p;
+
+	if (!adapter->vf_data)
+		return -ENOENT;
+
+	len = adapter->vfs_allocated_count * sizeof(unsigned int);
+
+	new = kmalloc(len, GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	p = buf;
+	total = 0;
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		x = igb_strtoul(p, &next_p, 10);
+		if (p == next_p) {
+			dev_err(dev, "not enough values\n");
+			goto err;
+		}
+		if (x > 1000) {
+			dev_err(dev, "value is too large\n");
+			goto err;
+		}
+		new[i] = x;
+		total += x;
+		p = next_p;
+	}
+
+	/* Check for trailing rubbish */
+	igb_strtoul(p, &next_p, 10);
+	if (p != next_p) {
+		dev_err(dev, "trailing rubbish\n");
+		goto err;
+	}
+
+	spin_lock(&adapter->bandwidth_allocation_lock);
+	memcpy(adapter->bandwidth_allocation, new, len);
+	igb_enable_bandwidth_allocation(adapter);
+	spin_unlock(&adapter->bandwidth_allocation_lock);
+
+	status = count;
+err:
+	kfree(new);
+	return status;
+}
+#endif /* CONFIG_PCI_IOV */
 /* igb_main.c */
Index: net-next-2.6/drivers/net/igb/e1000_regs.h
===================================================================
--- net-next-2.6.orig/drivers/net/igb/e1000_regs.h	2009-11-05 03:07:08.000000000 +0900
+++ net-next-2.6/drivers/net/igb/e1000_regs.h	2009-11-05 05:01:35.000000000 +0900
@@ -308,6 +308,16 @@
 #define E1000_VLVF(_n)         (0x05D00 + (4 * (_n))) /* VLAN Virtual Machine
                                                        * Filter - RW */
 
+/* Tx Bandwidth Allocation to VM Registers */
+#define E1000_VMBACS	0x03600 /* VM Bandwidth Allocation
+				 * Control & Status - RW */
+#define E1000_VMBAMMW	0x03670 /* VM Bandwidth Allocation
+				 * Max Memory Window - RW */
+#define E1000_VMBASEL	0x03604 /* VM Bandwidth Allocation
+				 * Select - RW */
+#define E1000_VMBAC	0x03608 /* VM Bandwidth Allocation
+				 * Config - RW */
+
 #define wr32(reg, value) (writel(value, hw->hw_addr + reg))
 #define rd32(reg) (readl(hw->hw_addr + reg))
 #define wrfl() ((void)rd32(E1000_STATUS))
Index: net-next-2.6/drivers/net/igb/e1000_defines.h
===================================================================
--- net-next-2.6.orig/drivers/net/igb/e1000_defines.h	2009-11-05 03:07:08.000000000 +0900
+++ net-next-2.6/drivers/net/igb/e1000_defines.h	2009-11-05 05:01:35.000000000 +0900
@@ -711,4 +711,13 @@
 #define E1000_VFTA_ENTRY_MASK                0x7F
 #define E1000_VFTA_ENTRY_BIT_SHIFT_MASK      0x1F
 
+/* VM Bandwidth Allocation Control & Status */
+#define E1000_VMBACS_VMBA_SET		0x00001000
+#define E1000_VMBACS_SPEED_CHG		0x80000000
+
+/* VM Bandwidth Allocation Config */
+#define E1000_VMBAC_RF_INT_SHIFT	14
+#define E1000_VMBAC_RF_MASK		((1<<23)-1)	/* RF_DEC and RF_INT */
+#define E1000_VMBAC_RC_ENA		0x80000000
+
 #endif
Index: net-next-2.6/drivers/net/igb/igb.h
===================================================================
--- net-next-2.6.orig/drivers/net/igb/igb.h	2009-11-05 03:07:08.000000000 +0900
+++ net-next-2.6/drivers/net/igb/igb.h	2009-11-05 05:01:35.000000000 +0900
@@ -315,6 +315,10 @@ struct igb_adapter {
 	u16 rx_ring_count;
 	unsigned int vfs_allocated_count;
 	struct vf_data_storage *vf_data;
+#ifdef CONFIG_PCI_IOV
+	unsigned int *bandwidth_allocation;
+	spinlock_t bandwidth_allocation_lock;
+#endif
 };
 
 #define IGB_FLAG_HAS_MSI           (1 << 0)


------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day 
trial. Simplify your report design, integration and deployment - and focus on 
what you do best, core application coding. Discover what's new with
Crystal Reports now.  http://p.sf.net/sfu/bobj-july

  parent reply	other threads:[~2009-11-05  0:58 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-05  0:58 [rfc 0/4] igb: bandwidth allocation Simon Horman
2009-11-05  0:58 ` [rfc 1/4] igb: Add igb_cleanup_vf() Simon Horman
2009-11-05  0:58 ` [rfc 2/4] igb: Initialise adapter->vfs_allocated_count in igb_init_vf() Simon Horman
2009-11-05  0:58 ` [rfc 3/4] igb: Common error path in igb_init_vfs() Simon Horman
2009-11-05  0:58 ` Simon Horman [this message]
2009-11-05 23:00   ` [rfc 4/4] igb: expose 82576 bandiwidth allocation Alexander Duyck
2009-11-05 23:30     ` Simon Horman
2009-11-05 23:42       ` Alexander Duyck
2009-11-06  3:57         ` Simon Horman
2009-11-05  1:46 ` [rfc 0/4] igb: bandwidth allocation Jeff Kirsher
2009-11-05  2:21   ` Simon Horman
2009-11-14  8:01     ` Jeff Kirsher
2009-11-25  6:31       ` Simon Horman
2009-11-05 12:09 ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091105010628.148945886@vergenet.net \
    --to=horms@verge.net.au \
    --cc=arndbergmann@googlemail.com \
    --cc=e1000-devel@lists.sourceforge.net \
    --cc=jeffrey.t.kirsher@intel.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.