linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Bryan O'Sullivan" <bos@pathscale.com>
To: rdreier@cisco.com
Cc: openib-general@openfabrics.org, linux-kernel@vger.kernel.org
Subject: [PATCH 11 of 33] IB/ipath - Change packet problems vs chip errors handling and reporting
Date: Thu, 15 Mar 2007 14:44:55 -0700	[thread overview]
Message-ID: <c793dc8a526564b73018.1173995095@iqa-25.internal.keyresearch.com> (raw)
In-Reply-To: <patchbomb.1173995084@iqa-25.internal.keyresearch.com>

# HG changeset patch
# User Bryan O'Sullivan <bos@pathscale.com>
# Date 1173994464 25200
# Node ID c793dc8a526564b73018924a707bcb21052f8f36
# Parent  4050989280f08d81d06642e3d6cf5c3ea4397107
IB/ipath - Change packet problems vs chip errors handling and reporting

Some types of packet errors are moderately common with longer IB
cables and large clusters, and are not reported with prints by
other IB HCA drivers.  This suppresses those messages unless the
new __IPATH_ERRPKTDBG bit is set in ipath_debug.  Reporting
of temporarily disabled frequent error interrupts was also made
clearer

We also distinguish between chip errors, and bad packets sent or
received in the wording of the messages.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>

diff -r 4050989280f0 -r c793dc8a5265 drivers/infiniband/hw/ipath/ipath_debug.h
--- a/drivers/infiniband/hw/ipath/ipath_debug.h	Thu Mar 15 14:34:24 2007 -0700
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h	Thu Mar 15 14:34:24 2007 -0700
@@ -57,6 +57,7 @@
 #define __IPATH_PROCDBG     0x100
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG       0x200
+#define __IPATH_ERRPKTDBG   0x400
 #define __IPATH_USER_SEND   0x1000	/* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
diff -r 4050989280f0 -r c793dc8a5265 drivers/infiniband/hw/ipath/ipath_driver.c
--- a/drivers/infiniband/hw/ipath/ipath_driver.c	Thu Mar 15 14:34:24 2007 -0700
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c	Thu Mar 15 14:34:24 2007 -0700
@@ -754,9 +754,42 @@ static int ipath_wait_linkstate(struct i
 	return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
 }
 
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
-{
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else.   Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+{
+	int iserr = 1;
 	*buf = '\0';
+	if (err & INFINIPATH_E_PKTERRS) {
+		if (!(err & ~INFINIPATH_E_PKTERRS))
+			iserr = 0; // if only packet errors.
+		if (ipath_debug & __IPATH_ERRPKTDBG) {
+			if (err & INFINIPATH_E_REBP)
+				strlcat(buf, "EBP ", blen);
+			if (err & INFINIPATH_E_RVCRC)
+				strlcat(buf, "VCRC ", blen);
+			if (err & INFINIPATH_E_RICRC) {
+				strlcat(buf, "CRC ", blen);
+				// clear for check below, so only once
+				err &= INFINIPATH_E_RICRC; 
+			}
+			if (err & INFINIPATH_E_RSHORTPKTLEN)
+				strlcat(buf, "rshortpktlen ", blen);
+			if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+				strlcat(buf, "sdroppeddatapkt ", blen);
+			if (err & INFINIPATH_E_SPKTLEN)
+				strlcat(buf, "spktlen ", blen);
+		}
+		if ((err & INFINIPATH_E_RICRC) &&
+			!(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
+			strlcat(buf, "CRC ", blen);
+		if (!iserr)
+			goto done;
+	}
 	if (err & INFINIPATH_E_RHDRLEN)
 		strlcat(buf, "rhdrlen ", blen);
 	if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +800,12 @@ void ipath_decode_err(char *buf, size_t 
 		strlcat(buf, "rhdr ", blen);
 	if (err & INFINIPATH_E_RLONGPKTLEN)
 		strlcat(buf, "rlongpktlen ", blen);
-	if (err & INFINIPATH_E_RSHORTPKTLEN)
-		strlcat(buf, "rshortpktlen ", blen);
 	if (err & INFINIPATH_E_RMAXPKTLEN)
 		strlcat(buf, "rmaxpktlen ", blen);
 	if (err & INFINIPATH_E_RMINPKTLEN)
 		strlcat(buf, "rminpktlen ", blen);
+	if (err & INFINIPATH_E_SMINPKTLEN)
+		strlcat(buf, "sminpktlen ", blen);
 	if (err & INFINIPATH_E_RFORMATERR)
 		strlcat(buf, "rformaterr ", blen);
 	if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +814,20 @@ void ipath_decode_err(char *buf, size_t 
 		strlcat(buf, "runexpchar ", blen);
 	if (err & INFINIPATH_E_RIBFLOW)
 		strlcat(buf, "ribflow ", blen);
-	if (err & INFINIPATH_E_REBP)
-		strlcat(buf, "EBP ", blen);
 	if (err & INFINIPATH_E_SUNDERRUN)
 		strlcat(buf, "sunderrun ", blen);
 	if (err & INFINIPATH_E_SPIOARMLAUNCH)
 		strlcat(buf, "spioarmlaunch ", blen);
 	if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
 		strlcat(buf, "sunexperrpktnum ", blen);
-	if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-		strlcat(buf, "sdroppeddatapkt ", blen);
 	if (err & INFINIPATH_E_SDROPPEDSMPPKT)
 		strlcat(buf, "sdroppedsmppkt ", blen);
 	if (err & INFINIPATH_E_SMAXPKTLEN)
 		strlcat(buf, "smaxpktlen ", blen);
-	if (err & INFINIPATH_E_SMINPKTLEN)
-		strlcat(buf, "sminpktlen ", blen);
 	if (err & INFINIPATH_E_SUNSUPVL)
 		strlcat(buf, "sunsupVL ", blen);
-	if (err & INFINIPATH_E_SPKTLEN)
-		strlcat(buf, "spktlen ", blen);
 	if (err & INFINIPATH_E_INVALIDADDR)
 		strlcat(buf, "invalidaddr ", blen);
-	if (err & INFINIPATH_E_RICRC)
-		strlcat(buf, "CRC ", blen);
-	if (err & INFINIPATH_E_RVCRC)
-		strlcat(buf, "VCRC ", blen);
 	if (err & INFINIPATH_E_RRCVEGRFULL)
 		strlcat(buf, "rcvegrfull ", blen);
 	if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +840,8 @@ void ipath_decode_err(char *buf, size_t 
 		strlcat(buf, "hardware ", blen);
 	if (err & INFINIPATH_E_RESET)
 		strlcat(buf, "reset ", blen);
+done:
+	return iserr;
 }
 
 /**
diff -r 4050989280f0 -r c793dc8a5265 drivers/infiniband/hw/ipath/ipath_intr.c
--- a/drivers/infiniband/hw/ipath/ipath_intr.c	Thu Mar 15 14:34:24 2007 -0700
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c	Thu Mar 15 14:34:24 2007 -0700
@@ -403,10 +403,13 @@ static void handle_supp_msgs(struct ipat
 	 * happens so often we never want to count it.
 	 */
 	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-		ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
-				 ~INFINIPATH_E_IBSTATUSCHANGED);
+		int iserr;
+		iserr = ipath_decode_err(msg, sizeof msg,
+				dd->ipath_lasterror &
+				~INFINIPATH_E_IBSTATUSCHANGED);
 		if (dd->ipath_lasterror &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+		    	~(INFINIPATH_E_RRCVEGRFULL |
+			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
 			ipath_dev_err(dd, "Suppressed %u messages for "
 				      "fast-repeating errors (%s) (%llx)\n",
 				      supp_msgs, msg,
@@ -420,8 +423,13 @@ static void handle_supp_msgs(struct ipat
 			 * them. So only complain about these at debug
 			 * level.
 			 */
-			ipath_dbg("Suppressed %u messages for %s\n",
-				  supp_msgs, msg);
+			if (iserr)
+				ipath_dbg("Suppressed %u messages for %s\n",
+					  supp_msgs, msg);
+			else
+				ipath_cdbg(ERRPKT,
+					"Suppressed %u messages for %s\n",
+					  supp_msgs, msg);
 		}
 	}
 }
@@ -462,7 +470,7 @@ static int handle_errors(struct ipath_de
 {
 	char msg[512];
 	u64 ignore_this_time = 0;
-	int i;
+	int i, iserr = 0;
 	int chkerrpkts = 0, noprint = 0;
 	unsigned supp_msgs;
 
@@ -502,6 +510,7 @@ static int handle_errors(struct ipath_de
 	}
 
 	if (supp_msgs == 250000) {
+		int s_iserr;
 		/*
 		 * It's not entirely reasonable assuming that the errors set
 		 * in the last clear period are all responsible for the
@@ -511,17 +520,17 @@ static int handle_errors(struct ipath_de
 		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
 				 ~dd->ipath_maskederrs);
-		ipath_decode_err(msg, sizeof msg,
+		s_iserr = ipath_decode_err(msg, sizeof msg,
 				 (dd->ipath_maskederrs & ~dd->
 				  ipath_ignorederrs));
 
 		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
-			ipath_dev_err(dd, "Disabling error(s) %llx because "
-				      "occurring too frequently (%s)\n",
-				      (unsigned long long)
-				      (dd->ipath_maskederrs &
-				       ~dd->ipath_ignorederrs), msg);
+			~(INFINIPATH_E_RRCVEGRFULL |
+			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+			ipath_dev_err(dd, "Temporarily disabling "
+			    "error(s) %llx reporting; too frequent (%s)\n",
+				(unsigned long long) (dd->ipath_maskederrs &
+				~dd->ipath_ignorederrs), msg);
 		else {
 			/*
 			 * rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +539,15 @@ static int handle_errors(struct ipath_de
 			 * processing them.  So only complain about
 			 * these at debug level.
 			 */
-			ipath_dbg("Disabling frequent queue full errors "
-				  "(%s)\n", msg);
+			if (s_iserr)
+				ipath_dbg("Temporarily disabling reporting "
+				    "too frequent queue full errors (%s)\n",
+				    msg);
+			else
+				ipath_cdbg(ERRPKT,
+				    "Temporarily disabling reporting too"
+				    " frequent packet errors (%s)\n",
+				    msg);
 		}
 
 		/*
@@ -589,6 +605,8 @@ static int handle_errors(struct ipath_de
 		ipath_stats.sps_crcerrs++;
 		chkerrpkts = 1;
 	}
+	iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
+
 
 	/*
 	 * We don't want to print these two as they happen, or we can make
@@ -677,8 +695,13 @@ static int handle_errors(struct ipath_de
 		*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
 	}
 
-	if (!noprint && *msg)
-		ipath_dev_err(dd, "%s error\n", msg);
+	if (!noprint && *msg) {
+		if (iserr)
+			ipath_dev_err(dd, "%s error\n", msg);
+		else
+			dev_info(&dd->pcidev->dev, "%s packet problems\n",
+				msg);
+	}
 	if (dd->ipath_state_wanted & dd->ipath_flags) {
 		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
 			   "waking\n", dd->ipath_state_wanted,
diff -r 4050989280f0 -r c793dc8a5265 drivers/infiniband/hw/ipath/ipath_kernel.h
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h	Thu Mar 15 14:34:24 2007 -0700
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h	Thu Mar 15 14:34:24 2007 -0700
@@ -611,7 +611,7 @@ extern int ipath_diag_inuse;
 extern int ipath_diag_inuse;
 
 irqreturn_t ipath_intr(int irq, void *devid);
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
 #if __IPATH_INFO || __IPATH_DBG
 extern const char *ipath_ibcstatus_str[];
 #endif
diff -r 4050989280f0 -r c793dc8a5265 drivers/infiniband/hw/ipath/ipath_registers.h
--- a/drivers/infiniband/hw/ipath/ipath_registers.h	Thu Mar 15 14:34:24 2007 -0700
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h	Thu Mar 15 14:34:24 2007 -0700
@@ -125,6 +125,15 @@
 #define INFINIPATH_E_INVALIDADDR     0x0002000000000000ULL
 #define INFINIPATH_E_RESET           0x0004000000000000ULL
 #define INFINIPATH_E_HARDWARE        0x0008000000000000ULL
+
+/*
+ * this is used to print "common" packet errors only when the
+ * __IPATH_ERRPKTDBG bit is set in ipath_debug.
+ */
+#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
+		| INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
+		| INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
+		| INFINIPATH_E_REBP )
 
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
diff -r 4050989280f0 -r c793dc8a5265 drivers/infiniband/hw/ipath/ipath_stats.c
--- a/drivers/infiniband/hw/ipath/ipath_stats.c	Thu Mar 15 14:34:24 2007 -0700
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c	Thu Mar 15 14:34:24 2007 -0700
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long o
 	if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
 	    && time_after(jiffies, dd->ipath_unmasktime)) {
 		char ebuf[256];
-		ipath_decode_err(ebuf, sizeof ebuf,
+		int iserr;
+		iserr = ipath_decode_err(ebuf, sizeof ebuf,
 				 (dd->ipath_maskederrs & ~dd->
 				  ipath_ignorederrs));
 		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+				~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+				INFINIPATH_E_PKTERRS ))
 			ipath_dev_err(dd, "Re-enabling masked errors "
 				      "(%s)\n", ebuf);
 		else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long o
 			 * them.  So only complain about these at debug
 			 * level.
 			 */
-			ipath_dbg("Disabling frequent queue full errors "
-				  "(%s)\n", ebuf);
+			if (iserr)
+					ipath_dbg("Re-enabling queue full errors (%s)\n",
+							ebuf);
+			else
+				ipath_cdbg(ERRPKT, "Re-enabling packet"
+						" problem interrupt (%s)\n", ebuf);
 		}
 		dd->ipath_maskederrs = dd->ipath_ignorederrs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,

  parent reply	other threads:[~2007-03-15 22:19 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-03-15 21:44 [PATCH 00 of 33] Set of ipath patches for 2.6.22 Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 01 of 33] IB/ipath - add ability to set and clear IB local loopback Bryan O'Sullivan
2007-03-19 21:22   ` Roland Dreier
2007-03-21 18:50     ` Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 02 of 33] IB/ipath - fix user memory region creation when IOMMU present Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 03 of 33] IB/ipath - definitions of two of RXE parity error bits were reversed Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 04 of 33] IB/ipath - don't initialize port memory for subports Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 05 of 33] IB/ipath - fix case where SRQ limit event causes CQ entry to be dropped Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 06 of 33] IB/ipath - NMI cpu lockup if local loopback used Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 07 of 33] IB/ipath - support larger IB_QP_MAX_DEST_RD_ATOMIC and IB_QP_MAX_QP_RD_ATOMIC Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 08 of 33] IB/ipath - fix up some debug messages Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 09 of 33] IB/ipath - fix QP error completion queue entries Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 10 of 33] IB/ipath - fix PSN update for RC retries Bryan O'Sullivan
2007-03-15 21:44 ` Bryan O'Sullivan [this message]
2007-03-15 21:44 ` [PATCH 12 of 33] IB/ipath - fix bad argument to clear_bit that trashed memory and/or crashed Bryan O'Sullivan
2007-03-19 21:24   ` Roland Dreier
2007-03-15 21:44 ` [PATCH 13 of 33] IB/ipath - Fix CQ flushing when QP is modified to error state Bryan O'Sullivan
2007-03-15 21:44 ` [PATCH 14 of 33] IB/ipath - fix port sharing on powerpc Bryan O'Sullivan
2007-03-19 21:27   ` Roland Dreier
2007-04-10 22:32   ` Roland Dreier
2007-03-15 21:44 ` [PATCH 15 of 33] IB/ipath - allow receive ports mapped into userspace to be shared Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 16 of 33] IB/ipath - fix RDMA reads of length zero and error handling Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 17 of 33] IB/ipath - remove unused register read routine ipath_read_kreg64_port() Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 18 of 33] IB/ipath - Fix calculation for number of kernel PIO buffers Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 19 of 33] IB/ipath - Discard multicast packets without a GRH Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 20 of 33] IB/ipath - call free_irq on chip specific initialization failure Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 21 of 33] IB/ipath - force PIOAvail update entry point Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 22 of 33] IB/ipath - print better error messages if kernel is misconfigured Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 23 of 33] IB/ipath - Improve handling and reporting of parity errors, mostly cleanup Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 24 of 33] IB/ipath - fix driver crash (in interrupt or during unload) after chip reset Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 25 of 33] IB/ipath - On unrecoverable errors, force link dow, LEDs off Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 26 of 33] IB/ipath - prevent random program use of diags interface Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 27 of 33] IB/ipath - cleaner shutdown at driver unload, disable IB link earlier Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 28 of 33] IB/ipath - Don't allow QP's 0 and 1 to be opened multiple times Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 29 of 33] IB/ipath - fix unit selection due to all cpu affinity bits set Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 30 of 33] IB/ipath - check reserved keys Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 31 of 33] IB/ipath - remove duplicate stuff from ipath_verbs.h Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 32 of 33] IB/ipath - check that a UD work request's address handle is valid Bryan O'Sullivan
2007-03-15 21:45 ` [PATCH 33 of 33] IB/ipath - fix drift between WCs in user and kernel space Bryan O'Sullivan
2007-03-19 21:23   ` Roland Dreier
2007-03-19 21:17 ` [PATCH 00 of 33] Set of ipath patches for 2.6.22 Roland Dreier
2007-04-10 22:30 ` Roland Dreier
2007-04-11  0:35   ` [ofa-general] " Roland Dreier
2007-04-11  0:48     ` Robert Walsh
2007-04-11 22:24   ` Robert Walsh
2007-04-11 22:33     ` Roland Dreier
2007-04-11 22:47       ` Robert Walsh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c793dc8a526564b73018.1173995095@iqa-25.internal.keyresearch.com \
    --to=bos@pathscale.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=openib-general@openfabrics.org \
    --cc=rdreier@cisco.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).