linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Bruce Allan <bruce.w.allan@intel.com>,
	Jeff Pieper <jeffrey.e.pieper@intel.com>,
	Jeff Kirsher <jeffrey.t.kirsher@intel.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [ 12/61] e1000e: enable ECC on I217/I218 to catch packet buffer memory errors
Date: Tue, 12 Feb 2013 12:34:32 -0800	[thread overview]
Message-ID: <20130212203419.852886625@linuxfoundation.org> (raw)
In-Reply-To: <20130212203417.890993903@linuxfoundation.org>

3.7-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Bruce Allan <bruce.w.allan@intel.com>

commit 286003048aaef49b26bb9d93611dc69085e8982e upstream.

In rare instances, memory errors have been detected in the internal packet
buffer memory on I217/I218 when stressed under certain environmental
conditions.  Enable Error Correcting Code (ECC) in hardware to catch both
correctable and uncorrectable errors.  Correctable errors will be handled
by the hardware.  Uncorrectable errors in the packet buffer will cause the
packet to be received with an error indication in the buffer descriptor
causing the packet to be discarded.  If the uncorrectable error is in the
descriptor itself, the hardware will stop and interrupt the driver
indicating the error.  The driver will then reset the hardware in order to
clear the error and restart.

Both types of errors will be accounted for in statistics counters.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 drivers/net/ethernet/intel/e1000e/defines.h |    9 +++++
 drivers/net/ethernet/intel/e1000e/e1000.h   |    2 +
 drivers/net/ethernet/intel/e1000e/ethtool.c |    2 +
 drivers/net/ethernet/intel/e1000e/hw.h      |    1 
 drivers/net/ethernet/intel/e1000e/ich8lan.c |   11 ++++++
 drivers/net/ethernet/intel/e1000e/netdev.c  |   46 ++++++++++++++++++++++++++++
 6 files changed, 71 insertions(+)

--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -233,6 +233,7 @@
 #define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
 #define E1000_CTRL_LANPHYPC_OVERRIDE 0x00010000 /* SW control of LANPHYPC */
 #define E1000_CTRL_LANPHYPC_VALUE    0x00020000 /* SW value of LANPHYPC */
+#define E1000_CTRL_MEHE     0x00080000  /* Memory Error Handling Enable */
 #define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
 #define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
 #define E1000_CTRL_SWDPIO0  0x00400000  /* SWDPIN 0 Input or output */
@@ -391,6 +392,12 @@
 
 #define E1000_PBS_16K E1000_PBA_16K
 
+/* Uncorrectable/correctable ECC Error counts and enable bits */
+#define E1000_PBECCSTS_CORR_ERR_CNT_MASK	0x000000FF
+#define E1000_PBECCSTS_UNCORR_ERR_CNT_MASK	0x0000FF00
+#define E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT	8
+#define E1000_PBECCSTS_ECC_ENABLE		0x00010000
+
 #define IFS_MAX       80
 #define IFS_MIN       40
 #define IFS_RATIO     4
@@ -410,6 +417,7 @@
 #define E1000_ICR_RXSEQ         0x00000008 /* Rx sequence error */
 #define E1000_ICR_RXDMT0        0x00000010 /* Rx desc min. threshold (0) */
 #define E1000_ICR_RXT0          0x00000080 /* Rx timer intr (ring 0) */
+#define E1000_ICR_ECCER         0x00400000 /* Uncorrectable ECC Error */
 #define E1000_ICR_INT_ASSERTED  0x80000000 /* If this bit asserted, the driver should claim the interrupt */
 #define E1000_ICR_RXQ0          0x00100000 /* Rx Queue 0 Interrupt */
 #define E1000_ICR_RXQ1          0x00200000 /* Rx Queue 1 Interrupt */
@@ -446,6 +454,7 @@
 #define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* Rx sequence error */
 #define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* Rx desc min. threshold */
 #define E1000_IMS_RXT0      E1000_ICR_RXT0      /* Rx timer intr */
+#define E1000_IMS_ECCER     E1000_ICR_ECCER     /* Uncorrectable ECC Error */
 #define E1000_IMS_RXQ0      E1000_ICR_RXQ0      /* Rx Queue 0 Interrupt */
 #define E1000_IMS_RXQ1      E1000_ICR_RXQ1      /* Rx Queue 1 Interrupt */
 #define E1000_IMS_TXQ0      E1000_ICR_TXQ0      /* Tx Queue 0 Interrupt */
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -314,6 +314,8 @@ struct e1000_adapter {
 
 	struct napi_struct napi;
 
+	unsigned int uncorr_errors;	/* uncorrectable ECC errors */
+	unsigned int corr_errors;	/* correctable ECC errors */
 	unsigned int restart_queue;
 	u32 txd_cmd;
 
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -108,6 +108,8 @@ static const struct e1000_stats e1000_gs
 	E1000_STAT("dropped_smbus", stats.mgpdc),
 	E1000_STAT("rx_dma_failed", rx_dma_failed),
 	E1000_STAT("tx_dma_failed", tx_dma_failed),
+	E1000_STAT("uncorr_ecc_errors", uncorr_errors),
+	E1000_STAT("corr_ecc_errors", corr_errors),
 };
 
 #define E1000_GLOBAL_STATS_LEN	ARRAY_SIZE(e1000_gstrings_stats)
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -77,6 +77,7 @@ enum e1e_registers {
 #define E1000_POEMB	E1000_PHY_CTRL	/* PHY OEM Bits */
 	E1000_PBA      = 0x01000, /* Packet Buffer Allocation - RW */
 	E1000_PBS      = 0x01008, /* Packet Buffer Size */
+	E1000_PBECCSTS = 0x0100C, /* Packet Buffer ECC Status - RW */
 	E1000_EEMNGCTL = 0x01010, /* MNG EEprom Control */
 	E1000_EEWR     = 0x0102C, /* EEPROM Write Register - RW */
 	E1000_FLOP     = 0x0103C, /* FLASH Opcode Register */
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -3690,6 +3690,17 @@ static void e1000_initialize_hw_bits_ich
 	if (hw->mac.type == e1000_ich8lan)
 		reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS);
 	ew32(RFCTL, reg);
+
+	/* Enable ECC on Lynxpoint */
+	if (hw->mac.type == e1000_pch_lpt) {
+		reg = er32(PBECCSTS);
+		reg |= E1000_PBECCSTS_ECC_ENABLE;
+		ew32(PBECCSTS, reg);
+
+		reg = er32(CTRL);
+		reg |= E1000_CTRL_MEHE;
+		ew32(CTRL, reg);
+	}
 }
 
 /**
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1687,6 +1687,23 @@ static irqreturn_t e1000_intr_msi(int ir
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
+	/* Reset on uncorrectable ECC error */
+	if ((icr & E1000_ICR_ECCER) && (hw->mac.type == e1000_pch_lpt)) {
+		u32 pbeccsts = er32(PBECCSTS);
+
+		adapter->corr_errors +=
+		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+		adapter->uncorr_errors +=
+		    (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+		    E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+
+		/* Do the reset outside of interrupt context */
+		schedule_work(&adapter->reset_task);
+
+		/* return immediately since reset is imminent */
+		return IRQ_HANDLED;
+	}
+
 	if (napi_schedule_prep(&adapter->napi)) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
@@ -1754,6 +1771,23 @@ static irqreturn_t e1000_intr(int irq, v
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
+	/* Reset on uncorrectable ECC error */
+	if ((icr & E1000_ICR_ECCER) && (hw->mac.type == e1000_pch_lpt)) {
+		u32 pbeccsts = er32(PBECCSTS);
+
+		adapter->corr_errors +=
+		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+		adapter->uncorr_errors +=
+		    (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+		    E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+
+		/* Do the reset outside of interrupt context */
+		schedule_work(&adapter->reset_task);
+
+		/* return immediately since reset is imminent */
+		return IRQ_HANDLED;
+	}
+
 	if (napi_schedule_prep(&adapter->napi)) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
@@ -2117,6 +2151,8 @@ static void e1000_irq_enable(struct e100
 	if (adapter->msix_entries) {
 		ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
 		ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC);
+	} else if (hw->mac.type == e1000_pch_lpt) {
+		ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
 	} else {
 		ew32(IMS, IMS_ENABLE_MASK);
 	}
@@ -4297,6 +4333,16 @@ static void e1000e_update_stats(struct e
 	adapter->stats.mgptc += er32(MGTPTC);
 	adapter->stats.mgprc += er32(MGTPRC);
 	adapter->stats.mgpdc += er32(MGTPDC);
+
+	/* Correctable ECC Errors */
+	if (hw->mac.type == e1000_pch_lpt) {
+		u32 pbeccsts = er32(PBECCSTS);
+		adapter->corr_errors +=
+		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+		adapter->uncorr_errors +=
+		    (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+		    E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+	}
 }
 
 /**



  parent reply	other threads:[~2013-02-12 21:09 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-12 20:34 [ 00/61] 3.7.8-stable review Greg Kroah-Hartman
2013-02-12 20:34 ` [ 01/61] rtlwifi: Fix the usage of the wrong variable in usb.c Greg Kroah-Hartman
2013-02-12 20:34 ` [ 02/61] rtlwifi: Fix scheduling while atomic bug Greg Kroah-Hartman
2013-02-12 20:34 ` [ 03/61] regulator: max8998: fix incorrect min_uV value for ldo10 Greg Kroah-Hartman
2013-02-12 20:34 ` [ 04/61] regulator: clear state each invocation of of_regulator_match Greg Kroah-Hartman
2013-02-12 20:34 ` [ 05/61] regulator: s2mps11: fix incorrect register for buck10 Greg Kroah-Hartman
2013-02-12 20:34 ` [ 06/61] IB/qib: Fix for broken sparse warning fix Greg Kroah-Hartman
2013-02-12 20:34 ` [ 07/61] virtio_console: Dont access uninitialized data Greg Kroah-Hartman
2013-02-12 20:34 ` [ 08/61] Bluetooth: Fix handling of unexpected SMP PDUs Greg Kroah-Hartman
2013-02-12 20:34 ` [ 09/61] Revert "iwlwifi: fix the reclaimed packet tracking upon flush queue" Greg Kroah-Hartman
2013-02-12 20:34 ` [ 10/61] can: c_can: Set reserved bit in IFx_MASK2 to 1 on write Greg Kroah-Hartman
2013-02-12 20:34 ` [ 11/61] mwifiex: fix incomplete scan in case of IE parsing error Greg Kroah-Hartman
2013-02-12 20:34 ` Greg Kroah-Hartman [this message]
2013-02-12 20:34 ` [ 13/61] media: pwc-if: must check vb2_queue_init() success Greg Kroah-Hartman
2013-02-12 20:34 ` [ 14/61] ath9k_hw: fix calibration issues on chainmask that dont include chain 0 Greg Kroah-Hartman
2013-02-12 20:34 ` [ 15/61] mfd: db8500-prcmu: Fix irqdomain usage Greg Kroah-Hartman
2013-02-12 20:34 ` [ 16/61] dm thin: fix queue limits stacking Greg Kroah-Hartman
2013-02-12 20:34 ` [ 17/61] net: prevent setting ttl=0 via IP_TTL Greg Kroah-Hartman
2013-02-12 20:34 ` [ 18/61] ipv6: fix the noflags test in addrconf_get_prefix_route Greg Kroah-Hartman
2013-02-12 20:34 ` [ 19/61] net, wireless: overwrite default_ethtool_ops Greg Kroah-Hartman
2013-02-12 20:34 ` [ 20/61] tcp: fix a panic on UP machines in reqsk_fastopen_remove Greg Kroah-Hartman
2013-02-12 20:34 ` [ 21/61] MAINTAINERS: Stephen Hemminger email change Greg Kroah-Hartman
2013-02-12 20:34 ` [ 22/61] ipv6: fix header length calculation in ip6_append_data() Greg Kroah-Hartman
2013-02-12 20:34 ` [ 23/61] macvlan: fix macvlan_get_size() Greg Kroah-Hartman
2013-02-12 20:34 ` [ 24/61] net: calxedaxgmac: throw away overrun frames Greg Kroah-Hartman
2013-02-12 20:34 ` [ 25/61] net/mlx4_en: Fix bridged vSwitch configuration for non SRIOV mode Greg Kroah-Hartman
2013-02-12 20:34 ` [ 26/61] net/mlx4_core: Set number of msix vectors under SRIOV mode to firmware defaults Greg Kroah-Hartman
2013-02-12 20:34 ` [ 27/61] tcp: fix incorrect LOCKDROPPEDICMPS counter Greg Kroah-Hartman
2013-02-12 20:34 ` [ 28/61] isdn/gigaset: fix zero size border case in debug dump Greg Kroah-Hartman
2013-02-12 20:34 ` [ 29/61] netxen: fix off by one bug in netxen_release_tx_buffer() Greg Kroah-Hartman
2013-02-12 20:34 ` [ 30/61] r8169: remove the obsolete and incorrect AMD workaround Greg Kroah-Hartman
2013-02-12 20:34 ` [ 31/61] net: loopback: fix a dst refcounting issue Greg Kroah-Hartman
2013-02-12 20:34 ` [ 32/61] IP_GRE: Fix kernel panic in IP_GRE with GRE csum Greg Kroah-Hartman
2013-02-12 20:34 ` [ 33/61] pktgen: correctly handle failures when adding a device Greg Kroah-Hartman
2013-02-12 20:34 ` [ 34/61] ipv6: do not create neighbor entries for local delivery Greg Kroah-Hartman
2013-02-12 20:34 ` [ 35/61] via-rhine: Fix bugs in NAPI support Greg Kroah-Hartman
2013-02-12 20:34 ` [ 36/61] packet: fix leakage of tx_ring memory Greg Kroah-Hartman
2013-02-12 20:34 ` [ 37/61] ipv6/ip6_gre: fix error case handling in ip6gre_tunnel_xmit() Greg Kroah-Hartman
2013-02-12 20:34 ` [ 38/61] atm/iphase: rename fregt_t -> ffreg_t Greg Kroah-Hartman
2013-02-12 20:34 ` [ 39/61] xen/netback: shutdown the ring if it contains garbage Greg Kroah-Hartman
2013-02-12 20:35 ` [ 40/61] xen/netback: dont leak pages on failure in xen_netbk_tx_check_gop Greg Kroah-Hartman
2013-02-12 20:35 ` [ 41/61] xen/netback: free already allocated memory on failure in xen_netbk_get_requests Greg Kroah-Hartman
2013-02-12 20:35 ` [ 42/61] netback: correct netbk_tx_err to handle wrap around Greg Kroah-Hartman
2013-02-12 20:35 ` [ 43/61] ipv4: Remove output route check in ipv4_mtu Greg Kroah-Hartman
2013-02-12 20:35 ` [ 44/61] ipv4: Dont update the pmtu on mtu locked routes Greg Kroah-Hartman
2013-02-12 20:35 ` [ 45/61] ipv6: Add an error handler for icmp6 Greg Kroah-Hartman
2013-02-12 20:35 ` [ 46/61] ipv4: Invalidate the socket cached route on pmtu events if possible Greg Kroah-Hartman
2013-02-12 20:35 ` [ 47/61] ipv4: Add a socket release callback for datagram sockets Greg Kroah-Hartman
2013-02-12 20:35 ` [ 48/61] ipv4: Fix route refcount on pmtu discovery Greg Kroah-Hartman
2013-02-12 20:35 ` [ 49/61] sctp: refactor sctp_outq_teardown to insure proper re-initalization Greg Kroah-Hartman
2013-02-12 20:35 ` [ 50/61] net: sctp: sctp_setsockopt_auth_key: use kzfree instead of kfree Greg Kroah-Hartman
2013-02-12 20:35 ` [ 51/61] net: sctp: sctp_endpoint_free: zero out secret key data Greg Kroah-Hartman
2013-02-12 20:35 ` [ 52/61] tcp: detect SYN/data drop when F-RTO is disabled Greg Kroah-Hartman
2013-02-12 20:35 ` [ 53/61] tcp: fix an infinite loop in tcp_slow_start() Greg Kroah-Hartman
2013-02-12 20:35 ` [ 54/61] tcp: frto should not set snd_cwnd to 0 Greg Kroah-Hartman
2013-02-12 20:35 ` [ 55/61] tcp: fix for zero packets_in_flight was too broad Greg Kroah-Hartman
2013-02-12 20:35 ` [ 56/61] tcp: dont abort splice() after small transfers Greg Kroah-Hartman
2013-02-12 20:35 ` [ 57/61] tcp: splice: fix an infinite loop in tcp_read_sock() Greg Kroah-Hartman
2013-02-12 20:35 ` [ 58/61] tcp: fix splice() and tcp collapsing interaction Greg Kroah-Hartman
2013-02-12 20:35 ` [ 59/61] net: splice: avoid high order page splitting Greg Kroah-Hartman
2013-02-12 20:35 ` [ 60/61] net: splice: fix __splice_segment() Greg Kroah-Hartman
2013-02-12 20:35 ` [ 61/61] drm/nouveau: add lockdep annotations Greg Kroah-Hartman
2013-02-13  3:35   ` Peter Hurley
2013-02-13  9:33     ` Arend van Spriel
2013-02-13  9:43       ` Ben Skeggs
2013-02-13  9:52         ` Arend van Spriel
2013-02-13 17:46         ` Marcin Slusarz
2013-02-13 18:38           ` Marcin Slusarz
2013-02-13  7:06 ` [ 00/61] 3.7.8-stable review Satoru Takeuchi
2013-02-13 15:51 ` Shuah Khan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130212203419.852886625@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=bruce.w.allan@intel.com \
    --cc=davem@davemloft.net \
    --cc=jeffrey.e.pieper@intel.com \
    --cc=jeffrey.t.kirsher@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).