netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
To: davem@davemloft.net
Cc: Alexander Duyck <alexander.h.duyck@intel.com>,
	netdev@vger.kernel.org, gospo@redhat.com,
	Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Subject: [net-next 11/13] igb: Make Tx budget for NAPI user adjustable
Date: Sat, 17 Sep 2011 01:04:35 -0700	[thread overview]
Message-ID: <1316246677-8830-12-git-send-email-jeffrey.t.kirsher@intel.com> (raw)
In-Reply-To: <1316246677-8830-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change is meant to make the NAPI budget limits for transmit
adjustable.  By doing this it is possible to tune the value for optimal
performance with applications such as routing.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by:  Aaron Brown  <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h         |    3 +
 drivers/net/ethernet/intel/igb/igb_ethtool.c |    6 +
 drivers/net/ethernet/intel/igb/igb_main.c    |  136 ++++++++++++++++----------
 3 files changed, 92 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index b725937..cfa590c8 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -47,6 +47,7 @@ struct igb_adapter;
 
 /* TX/RX descriptor defines */
 #define IGB_DEFAULT_TXD                  256
+#define IXGBE_DEFAULT_TX_WORK		 128
 #define IGB_MIN_TXD                       80
 #define IGB_MAX_TXD                     4096
 
@@ -177,6 +178,7 @@ struct igb_q_vector {
 
 	u32 eims_value;
 	u16 cpu;
+	u16 tx_work_limit;
 
 	u16 itr_val;
 	u8 set_itr;
@@ -266,6 +268,7 @@ struct igb_adapter {
 	u16 rx_itr;
 
 	/* TX */
+	u16 tx_work_limit;
 	u32 tx_timeout_count;
 	int num_tx_queues;
 	struct igb_ring *tx_ring[16];
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index f231d82..f6da820 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1989,6 +1989,9 @@ static int igb_set_coalesce(struct net_device *netdev,
 	if ((adapter->flags & IGB_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs)
 		return -EINVAL;
 
+	if (ec->tx_max_coalesced_frames_irq)
+		adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq;
+
 	/* If ITR is disabled, disable DMAC */
 	if (ec->rx_coalesce_usecs == 0) {
 		if (adapter->flags & IGB_FLAG_DMAC)
@@ -2011,6 +2014,7 @@ static int igb_set_coalesce(struct net_device *netdev,
 
 	for (i = 0; i < adapter->num_q_vectors; i++) {
 		struct igb_q_vector *q_vector = adapter->q_vector[i];
+		q_vector->tx_work_limit = adapter->tx_work_limit;
 		if (q_vector->rx_ring)
 			q_vector->itr_val = adapter->rx_itr_setting;
 		else
@@ -2033,6 +2037,8 @@ static int igb_get_coalesce(struct net_device *netdev,
 	else
 		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
 
+	ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit;
+
 	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) {
 		if (adapter->tx_itr_setting <= 3)
 			ec->tx_coalesce_usecs = adapter->tx_itr_setting;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 7ad25e8..989e774 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -136,8 +136,8 @@ static irqreturn_t igb_msix_ring(int irq, void *);
 static void igb_update_dca(struct igb_q_vector *);
 static void igb_setup_dca(struct igb_adapter *);
 #endif /* CONFIG_IGB_DCA */
-static bool igb_clean_tx_irq(struct igb_q_vector *);
 static int igb_poll(struct napi_struct *, int);
+static bool igb_clean_tx_irq(struct igb_q_vector *);
 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 static void igb_tx_timeout(struct net_device *);
@@ -1120,6 +1120,7 @@ static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
 	q_vector->tx_ring = adapter->tx_ring[ring_idx];
 	q_vector->tx_ring->q_vector = q_vector;
 	q_vector->itr_val = adapter->tx_itr_setting;
+	q_vector->tx_work_limit = adapter->tx_work_limit;
 	if (q_vector->itr_val && q_vector->itr_val <= 3)
 		q_vector->itr_val = IGB_START_ITR;
 }
@@ -2388,11 +2389,17 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter)
 
 	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
 
+	/* set default ring sizes */
 	adapter->tx_ring_count = IGB_DEFAULT_TXD;
 	adapter->rx_ring_count = IGB_DEFAULT_RXD;
+
+	/* set default ITR values */
 	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
 	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
 
+	/* set default work limits */
+	adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;
+
 	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
 				  VLAN_HLEN;
 	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
@@ -5496,7 +5503,7 @@ static int igb_poll(struct napi_struct *napi, int budget)
 		igb_update_dca(q_vector);
 #endif
 	if (q_vector->tx_ring)
-		clean_complete = !!igb_clean_tx_irq(q_vector);
+		clean_complete = igb_clean_tx_irq(q_vector);
 
 	if (q_vector->rx_ring)
 		clean_complete &= igb_clean_rx_irq(q_vector, budget);
@@ -5578,64 +5585,69 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 {
 	struct igb_adapter *adapter = q_vector->adapter;
 	struct igb_ring *tx_ring = q_vector->tx_ring;
-	struct net_device *netdev = tx_ring->netdev;
-	struct e1000_hw *hw = &adapter->hw;
-	struct igb_buffer *buffer_info;
-	union e1000_adv_tx_desc *tx_desc, *eop_desc;
+	struct igb_buffer *tx_buffer;
+	union e1000_adv_tx_desc *tx_desc;
 	unsigned int total_bytes = 0, total_packets = 0;
-	unsigned int i, eop, count = 0;
-	bool cleaned = false;
+	unsigned int budget = q_vector->tx_work_limit;
+	u16 i = tx_ring->next_to_clean;
 
-	i = tx_ring->next_to_clean;
-	eop = tx_ring->buffer_info[i].next_to_watch;
-	eop_desc = IGB_TX_DESC(tx_ring, eop);
+	if (test_bit(__IGB_DOWN, &adapter->state))
+		return true;
 
-	while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
-	       (count < tx_ring->count)) {
-		rmb();	/* read buffer_info after eop_desc status */
-		for (cleaned = false; !cleaned; count++) {
-			tx_desc = IGB_TX_DESC(tx_ring, i);
-			buffer_info = &tx_ring->buffer_info[i];
-			cleaned = (i == eop);
+	tx_buffer = &tx_ring->buffer_info[i];
+	tx_desc = IGB_TX_DESC(tx_ring, i);
 
-			if (buffer_info->skb) {
-				total_bytes += buffer_info->bytecount;
-				/* gso_segs is currently only valid for tcp */
-				total_packets += buffer_info->gso_segs;
-				igb_tx_hwtstamp(q_vector, buffer_info);
-			}
+	for (; budget; budget--) {
+		u16 eop = tx_buffer->next_to_watch;
+		union e1000_adv_tx_desc *eop_desc;
+
+		eop_desc = IGB_TX_DESC(tx_ring, eop);
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
+			break;
+
+		/* prevent any other reads prior to eop_desc being verified */
+		rmb();
 
-			igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
+		do {
 			tx_desc->wb.status = 0;
+			if (likely(tx_desc == eop_desc)) {
+				eop_desc = NULL;
+
+				total_bytes += tx_buffer->bytecount;
+				total_packets += tx_buffer->gso_segs;
+				igb_tx_hwtstamp(q_vector, tx_buffer);
+			}
+
+			igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
 
+			tx_buffer++;
+			tx_desc++;
 			i++;
-			if (i == tx_ring->count)
+			if (unlikely(i == tx_ring->count)) {
 				i = 0;
-		}
-		eop = tx_ring->buffer_info[i].next_to_watch;
-		eop_desc = IGB_TX_DESC(tx_ring, eop);
+				tx_buffer = tx_ring->buffer_info;
+				tx_desc = IGB_TX_DESC(tx_ring, 0);
+			}
+		} while (eop_desc);
 	}
 
 	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->tx_syncp);
+	tx_ring->tx_stats.bytes += total_bytes;
+	tx_ring->tx_stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->tx_syncp);
+	tx_ring->total_bytes += total_bytes;
+	tx_ring->total_packets += total_packets;
 
-	if (unlikely(count &&
-		     netif_carrier_ok(netdev) &&
-		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
-		/* Make sure that anybody stopping the queue after this
-		 * sees the new next_to_clean.
-		 */
-		smp_mb();
-		if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
-		    !(test_bit(__IGB_DOWN, &adapter->state))) {
-			netif_wake_subqueue(netdev, tx_ring->queue_index);
+	if (tx_ring->detect_tx_hung) {
+		struct e1000_hw *hw = &adapter->hw;
+		u16 eop = tx_ring->buffer_info[i].next_to_watch;
+		union e1000_adv_tx_desc *eop_desc;
 
-			u64_stats_update_begin(&tx_ring->tx_syncp);
-			tx_ring->tx_stats.restart_queue++;
-			u64_stats_update_end(&tx_ring->tx_syncp);
-		}
-	}
+		eop_desc = IGB_TX_DESC(tx_ring, eop);
 
-	if (tx_ring->detect_tx_hung) {
 		/* Detect a transmit hang in hardware, this serializes the
 		 * check with the clearing of time_stamp and movement of i */
 		tx_ring->detect_tx_hung = false;
@@ -5666,16 +5678,34 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
 				eop,
 				jiffies,
 				eop_desc->wb.status);
-			netif_stop_subqueue(netdev, tx_ring->queue_index);
+			netif_stop_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+			/* we are about to reset, no point in enabling stuff */
+			return true;
 		}
 	}
-	tx_ring->total_bytes += total_bytes;
-	tx_ring->total_packets += total_packets;
-	u64_stats_update_begin(&tx_ring->tx_syncp);
-	tx_ring->tx_stats.bytes += total_bytes;
-	tx_ring->tx_stats.packets += total_packets;
-	u64_stats_update_end(&tx_ring->tx_syncp);
-	return count < tx_ring->count;
+
+	if (unlikely(total_packets &&
+		     netif_carrier_ok(tx_ring->netdev) &&
+		     igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		    !(test_bit(__IGB_DOWN, &adapter->state))) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+			u64_stats_update_begin(&tx_ring->tx_syncp);
+			tx_ring->tx_stats.restart_queue++;
+			u64_stats_update_end(&tx_ring->tx_syncp);
+		}
+	}
+
+	return !!budget;
 }
 
 static inline void igb_rx_checksum(struct igb_ring *ring,
-- 
1.7.6

  parent reply	other threads:[~2011-09-17  8:04 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-17  8:04 [net-next 00/13][pull request] Intel Wired LAN Driver Updates Jeff Kirsher
2011-09-17  8:04 ` [net-next 01/13] ixgb: eliminate checkstack warnings Jeff Kirsher
2011-09-17  8:43   ` Joe Perches
2011-09-19 21:36     ` Jesse Brandeburg
2011-09-19 22:28       ` [PATCH] intel: Convert <FOO>_LENGTH_OF_ADDRESS to ETH_ALEN Joe Perches
2011-09-19 23:33         ` Jeff Kirsher
2011-09-17  8:04 ` [net-next 02/13] igb: Update RXDCTL/TXDCTL configurations Jeff Kirsher
2011-09-17  8:04 ` [net-next 03/13] igb: Update max_frame_size to account for an optional VLAN tag if present Jeff Kirsher
2011-09-17  8:04 ` [net-next 04/13] igb: drop support for single buffer mode Jeff Kirsher
2011-09-17  8:04 ` [net-next 05/13] igb: streamline Rx buffer allocation and cleanup Jeff Kirsher
2011-09-17  8:04 ` [net-next 06/13] igb: update ring and adapter structure to improve performance Jeff Kirsher
2011-09-17  8:04 ` [net-next 07/13] igb: Refactor clean_rx_irq to reduce overhead and " Jeff Kirsher
2011-09-17  8:04 ` [net-next 08/13] igb: drop the "adv" off function names relating to descriptors Jeff Kirsher
2011-09-17  8:04 ` [net-next 09/13] igb: Replace E1000_XX_DESC_ADV with IGB_XX_DESC Jeff Kirsher
2011-09-17  8:04 ` [net-next 10/13] igb: Remove multi_tx_table and simplify igb_xmit_frame Jeff Kirsher
2011-09-17  8:04 ` Jeff Kirsher [this message]
2011-09-17 17:04   ` [net-next 11/13] igb: Make Tx budget for NAPI user adjustable Ben Hutchings
2011-09-19 15:48     ` Alexander Duyck
2011-09-19 16:05       ` Ben Hutchings
2011-09-19 16:32         ` Alexander Duyck
2011-09-19 21:00           ` David Miller
2011-09-19 22:27             ` Alexander Duyck
2011-09-19 23:36               ` Jeff Kirsher
2011-09-19 23:42                 ` Stephen Hemminger
2011-09-19 23:47                   ` David Miller
2011-09-20  0:10                   ` Ben Hutchings
2011-09-20 18:59                     ` Andy Gospodarek
2011-09-20 20:23                       ` Neil Horman
2011-09-27 23:45                         ` Ben Hutchings
2011-09-28 11:00                           ` Neil Horman
2011-09-28 15:11                             ` Stephen Hemminger
2011-09-28 17:07                               ` Neil Horman
2011-09-19 20:56         ` David Miller
2011-09-19 20:57   ` David Miller
2011-09-17  8:04 ` [net-next 12/13] igb: split buffer_info into tx_buffer_info and rx_buffer_info Jeff Kirsher
2011-09-17  8:04 ` [net-next 13/13] igb: Consolidate creation of Tx context descriptors into a single function Jeff Kirsher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1316246677-8830-12-git-send-email-jeffrey.t.kirsher@intel.com \
    --to=jeffrey.t.kirsher@intel.com \
    --cc=alexander.h.duyck@intel.com \
    --cc=davem@davemloft.net \
    --cc=gospo@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).