All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Don Hiatt <don.hiatt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>,
	Mike Marciniszyn
	<mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Subject: [PATCH 23/27] IB/hfi1: Add transmit fault injection feature
Date: Wed, 08 Feb 2017 05:28:07 -0800	[thread overview]
Message-ID: <20170208132806.16442.16539.stgit@scvm10.sc.intel.com> (raw)
In-Reply-To: <20170208132142.16442.69329.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>

From: Don Hiatt <don.hiatt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Add ability to fault packets on transmit by opcode.
Dropping by packet can be achived by setting the mask to 0.

In order to drop non-verbs traffic we set PbcInsertHrc
to NONE (0x2). The packet will still be delivered to
the receiving node but a KHdrHCRCErr (KDETH packet
with a bad HCRC) will be triggered and the packet will
not be delivered to the correct context.

In order to drop regular verbs traffic we set the
PbcTestEbp flag. The packet will still be delivered
to the receiving node but a 'late ebp error' will
be triggered and will be dropped.

A global toggle (/sys/kernel/debug/hfi1/hfi1_X/fault_suppress_err)
has been added to suppress the error messages on the receive
node when a packet was faulted on the sending node.

Signed-off-by: Mike Marciniszyn <mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Don Hiatt <don.hiatt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 drivers/infiniband/hw/hfi1/chip.c    |    4 +++
 drivers/infiniband/hw/hfi1/debugfs.c |    8 ++++++
 drivers/infiniband/hw/hfi1/debugfs.h |    6 ++++
 drivers/infiniband/hw/hfi1/driver.c  |   11 ++++++++
 drivers/infiniband/hw/hfi1/verbs.c   |   49 +++++++++++++++++++++++++++++-----
 drivers/infiniband/hw/hfi1/verbs.h   |    1 +
 include/rdma/ib_pack.h               |    2 +
 7 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index b9d491c..6e6a04b 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -64,6 +64,7 @@
 #include "platform.h"
 #include "aspm.h"
 #include "affinity.h"
+#include "debugfs.h"
 
 #define NUM_IB_PORTS 1
 
@@ -7897,6 +7898,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 		reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
 	}
 
+	if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
+		reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
+
 	/* report any remaining errors */
 	if (reg)
 		dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index b66eb01..ed4a217 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1257,6 +1257,11 @@ static int __init fault_init_debugfs(struct hfi1_ibdev *ibd)
 	return ret;
 }
 
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+	return ibd->fault_suppress_err;
+}
+
 bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
 {
 	bool ret = false;
@@ -1346,6 +1351,9 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
 		}
 
 #ifdef CONFIG_HFI1_FAULT_INJECTION
+	debugfs_create_bool("fault_suppress_err", 0600,
+			    ibd->hfi1_ibdev_dbg,
+			    &ibd->fault_suppress_err);
 	fault_init_debugfs(ibd);
 #endif
 }
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index 260bf46..181d125 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -75,6 +75,7 @@ struct fault_packet {
 
 bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
 bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
 #else
 static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
 {
@@ -86,6 +87,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
 {
 	return false;
 }
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+	return false;
+}
 #endif
 
 #else
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index c0b012f..64bdbce 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1367,6 +1367,11 @@ int process_receive_ib(struct hfi1_packet *packet)
 			  packet->updegr,
 			  rhf_egr_index(packet->rhf));
 
+	if (unlikely(
+		 (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+		 (packet->rhf & RHF_DC_ERR))))
+		return RHF_RCV_CONTINUE;
+
 	if (unlikely(rhf_err_flags(packet->rhf))) {
 		handle_eflags(packet);
 		return RHF_RCV_CONTINUE;
@@ -1402,6 +1407,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
 
 int process_receive_error(struct hfi1_packet *packet)
 {
+	/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
+	if (unlikely(
+		 hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+		 rhf_rcv_type_err(packet->rhf) == 3))
+		return RHF_RCV_CONTINUE;
+
 	handle_eflags(packet);
 
 	if (unlikely(rhf_err_flags(packet->rhf)))
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2f98c8e..b8b9819 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -502,6 +502,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
 	return NULL;
 }
 
+static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
+{
+#ifdef CONFIG_HFI1_FAULT_INJECTION
+	if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
+		/*
+		 * In order to drop non-IB traffic we
+		 * set PbcInsertHrc to NONE (0x2).
+		 * The packet will still be delivered
+		 * to the receiving node but a
+		 * KHdrHCRCErr (KDETH packet with a bad
+		 * HCRC) will be triggered and the
+		 * packet will not be delivered to the
+		 * correct context.
+		 */
+		pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
+	else
+		/*
+		 * In order to drop regular verbs
+		 * traffic we set the PbcTestEbp
+		 * flag. The packet will still be
+		 * delivered to the receiving node but
+		 * a 'late ebp error' will be
+		 * triggered and will be dropped.
+		 */
+		pbc |= PBC_TEST_EBP;
+#endif
+	return pbc;
+}
+
 /**
  * hfi1_ib_rcv - process an incoming packet
  * @packet: data packet information
@@ -787,7 +816,6 @@ static int build_verbs_tx_desc(
 		if (ret)
 			goto bail_txadd;
 	}
-
 	/* add the ulp payload - if any. tx->ss can be NULL for acks */
 	if (tx->ss)
 		ret = build_verbs_ulp_payload(sde, length, tx);
@@ -806,7 +834,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 	struct hfi1_ibdev *dev = ps->dev;
 	struct hfi1_pportdata *ppd = ps->ppd;
 	struct verbs_txreq *tx;
-	u64 pbc_flags = 0;
 	u8 sc5 = priv->s_sc;
 
 	int ret;
@@ -815,12 +842,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 	if (!sdma_txreq_built(&tx->txreq)) {
 		if (likely(pbc == 0)) {
 			u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+			u8 opcode = get_opcode(&tx->phdr.hdr);
+
 			/* No vl15 here */
 			/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-			pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+			pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
 
+			if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+				pbc = hfi1_fault_tx(qp, opcode, pbc);
 			pbc = create_pbc(ppd,
-					 pbc_flags,
+					 pbc,
 					 qp->srate_mbps,
 					 vl,
 					 plen);
@@ -923,7 +954,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 	u32 plen = hdrwords + dwords + 2; /* includes pbc */
 	struct hfi1_pportdata *ppd = ps->ppd;
 	u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
-	u64 pbc_flags = 0;
 	u8 sc5;
 	unsigned long flags = 0;
 	struct send_context *sc;
@@ -948,9 +978,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 
 	if (likely(pbc == 0)) {
 		u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+		struct verbs_txreq *tx = ps->s_txreq;
+		u8 opcode = get_opcode(&tx->phdr.hdr);
+
 		/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-		pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
-		pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+		pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+		if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+			pbc = hfi1_fault_tx(qp, opcode, pbc);
+		pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
 	}
 	if (cb)
 		iowait_pio_inc(&priv->s_iowait);
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 4b7fc2d..667867f 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -198,6 +198,7 @@ struct hfi1_ibdev {
 #ifdef CONFIG_HFI1_FAULT_INJECTION
 	struct fault_opcode *fault_opcode;
 	struct fault_packet *fault_packet;
+	bool fault_suppress_err;
 #endif
 #endif
 };
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index b13419c..3665589 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -80,6 +80,8 @@ enum {
 	IB_OPCODE_UD                                = 0x60,
 	/* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
 	IB_OPCODE_CNP                               = 0x80,
+	/* Manufacturer specific */
+	IB_OPCODE_MSP                               = 0xe0,
 
 	/* operations -- just used to define real constants */
 	IB_OPCODE_SEND_FIRST                        = 0x00,

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2017-02-08 13:28 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-08 13:25 [PATCH 00/27] IB/hfi1,qib,rdmavt: Patches for 4.11 Dennis Dalessandro
     [not found] ` <20170208132142.16442.69329.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-08 13:25   ` [PATCH 01/27] IB/hfi1: Correct defered count after processing qp_wait_list Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 02/27] IB/hfi1: Process qp wait list in IRQ thread periodically Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 03/27] IB/hfi1: Ensure read of producer s_head is correct Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 04/27] IB/hfi1: Use static CTLE with Preset 6 for integrated HFIs Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 05/27] IB/hfi1: Correct error calldown locking Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 06/27] IB/hfi1: Access hfi1_ibport through rcd pointer Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 07/27] IB/rdmavt: Use per-CPU reference count for MRs Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 08/27] IB/hfi1: Allocate context data on memory node Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 09/27] IB/hfi1: Add additional fields to qp_stats Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 10/27] IB/hfi1: Reduce oversized fields in struct hfi1_packet Dennis Dalessandro
2017-02-08 13:26   ` [PATCH 11/27] IB/hfi1: Check upper-case EFI variables Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 12/27] IB/hfi1, qib, rdmavt: Move two IB event functions into rdmavt Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 13/27] IB/hfi1, qib, rdmavt: Move AETH credit " Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 14/27] IB/rdmavt: Adding timer logic to rdmavt Dennis Dalessandro
     [not found]     ` <20170208132712.16442.57028.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:33       ` Leon Romanovsky
2017-02-08 13:27   ` [PATCH 15/27] IB/hfi1: Use new rdmavt timers Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 16/27] IB/qib: " Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 17/27] IB/hfi1, rdmavt: Update copy_sge to use boolean arguments Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 18/27] IB/hfi1, rdmavt: Move SGE state helper routines into rdmavt Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 19/27] IB/qib: Updates to use rdmavt's SGE helper routines Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 20/27] IB/rdmavt, IB/hfi1, IB/qib: Correct ack count for passive (RTR) QPs Dennis Dalessandro
2017-02-08 13:27   ` [PATCH 21/27] IB/hfi1: Modify logging frequency of DCC errors Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 22/27] IB/hfi1: Add receive fault injection feature Dennis Dalessandro
     [not found]     ` <20170208132800.16442.94549.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:42       ` Leon Romanovsky
     [not found]         ` <20170212174205.GI14015-U/DQcQFIOTAAJjI8aNfphQ@public.gmane.org>
2017-02-14 21:51           ` Doug Ledford
     [not found]             ` <1487109065.86943.86.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-02-28 17:55               ` Dennis Dalessandro
2017-02-08 13:28   ` Dennis Dalessandro [this message]
2017-02-08 13:28   ` [PATCH 24/27] IB/hfi1: Do not set physical link state if DC is in the shutdown state Dennis Dalessandro
2017-02-08 13:28   ` [PATCH 25/27] IB/hfi1: Add rvt_rnr_tbl_to_usec function Dennis Dalessandro
     [not found]     ` <20170208132818.16442.38634.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:48       ` Leon Romanovsky
2017-02-08 13:28   ` [PATCH 26/27] IB/hfi1, qib, rdmavt: Move AETH defines to rdma/ib_hdrs.h Dennis Dalessandro
     [not found]     ` <20170208132824.16442.61753.stgit-9QXIwq+3FY+1XWohqUldA0EOCMrvLtNR@public.gmane.org>
2017-02-12 17:47       ` Leon Romanovsky
2017-02-08 13:28   ` [PATCH 27/27] IB/hfi1: Code reuse with memdup_copy Dennis Dalessandro
2017-02-19 13:47   ` [PATCH 00/27] IB/hfi1,qib,rdmavt: Patches for 4.11 Doug Ledford

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170208132806.16442.16539.stgit@scvm10.sc.intel.com \
    --to=dennis.dalessandro-ral2jqcrhueavxtiumwx3w@public.gmane.org \
    --cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=don.hiatt-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=mike.marciniszyn-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.