All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] ehea: error handling improvement
@ 2010-04-19 12:08 ` Thomas Klein
  0 siblings, 0 replies; 4+ messages in thread
From: Thomas Klein @ 2010-04-19 12:08 UTC (permalink / raw)
  To: David S. Miller
  Cc: Christoph Raisch, Jan-Bernd Themann, linux-kernel, linux-ppc, netdev

Reset a port's resources only if they're actually in an error state

Signed-off-by: Thomas Klein <tklein@de.ibm.com>
---

Patch created against 2.6.34-rc4

diff -Nurp linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_main.c linux-2.6.34-rc4//drivers/net/ehea/ehea_main.c
--- linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_main.c	2010-04-19 11:54:07.000000000 +0200
+++ linux-2.6.34-rc4//drivers/net/ehea/ehea_main.c	2010-04-19 11:55:43.000000000 +0200
@@ -791,11 +791,17 @@ static struct ehea_cqe *ehea_proc_cqes(s
  		cqe_counter++;
  		rmb();
  		if (cqe->status & EHEA_CQE_STAT_ERR_MASK) {
-			ehea_error("Send Completion Error: Resetting port");
+			ehea_error("Bad send completion status=0x%04X",
+				   cqe->status);
+
  			if (netif_msg_tx_err(pr->port))
  				ehea_dump(cqe, sizeof(*cqe), "Send CQE");
-			ehea_schedule_port_reset(pr->port);
-			break;
+
+			if (cqe->status & EHEA_CQE_STAT_RESET_MASK) {
+				ehea_error("Resetting port");
+				ehea_schedule_port_reset(pr->port);
+				break;
+			}
  		}

  		if (netif_msg_tx_done(pr->port))
@@ -901,6 +907,8 @@ static irqreturn_t ehea_qp_aff_irq_handl
  	struct ehea_eqe *eqe;
  	struct ehea_qp *qp;
  	u32 qp_token;
+	u64 resource_type, aer, aerr;
+	int reset_port = 0;

  	eqe = ehea_poll_eq(port->qp_eq);

@@ -910,11 +918,24 @@ static irqreturn_t ehea_qp_aff_irq_handl
  			   eqe->entry, qp_token);

  		qp = port->port_res[qp_token].qp;
-		ehea_error_data(port->adapter, qp->fw_handle);
+
+		resource_type = ehea_error_data(port->adapter, qp->fw_handle,
+						&aer, &aerr);
+
+		if (resource_type == EHEA_AER_RESTYPE_QP) {
+			if ((aer & EHEA_AER_RESET_MASK) ||
+			    (aerr & EHEA_AERR_RESET_MASK))
+				 reset_port = 1;
+		} else
+			reset_port = 1;   /* Reset in case of CQ or EQ error */
+
  		eqe = ehea_poll_eq(port->qp_eq);
  	}

-	ehea_schedule_port_reset(port);
+	if (reset_port) {
+		ehea_error("Resetting port");
+		ehea_schedule_port_reset(port);
+	}

  	return IRQ_HANDLED;
  }
diff -Nurp linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_qmr.c linux-2.6.34-rc4//drivers/net/ehea/ehea_qmr.c
--- linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_qmr.c	2010-04-19 11:54:07.000000000 +0200
+++ linux-2.6.34-rc4//drivers/net/ehea/ehea_qmr.c	2010-04-19 11:56:36.000000000 +0200
@@ -229,14 +229,14 @@ u64 ehea_destroy_cq_res(struct ehea_cq *

  int ehea_destroy_cq(struct ehea_cq *cq)
  {
-	u64 hret;
+	u64 hret, aer, aerr;
  	if (!cq)
  		return 0;

  	hcp_epas_dtor(&cq->epas);
  	hret = ehea_destroy_cq_res(cq, NORMAL_FREE);
  	if (hret == H_R_STATE) {
-		ehea_error_data(cq->adapter, cq->fw_handle);
+		ehea_error_data(cq->adapter, cq->fw_handle, &aer, &aerr);
  		hret = ehea_destroy_cq_res(cq, FORCE_FREE);
  	}

@@ -357,7 +357,7 @@ u64 ehea_destroy_eq_res(struct ehea_eq *

  int ehea_destroy_eq(struct ehea_eq *eq)
  {
-	u64 hret;
+	u64 hret, aer, aerr;
  	if (!eq)
  		return 0;

@@ -365,7 +365,7 @@ int ehea_destroy_eq(struct ehea_eq *eq)

  	hret = ehea_destroy_eq_res(eq, NORMAL_FREE);
  	if (hret == H_R_STATE) {
-		ehea_error_data(eq->adapter, eq->fw_handle);
+		ehea_error_data(eq->adapter, eq->fw_handle, &aer, &aerr);
  		hret = ehea_destroy_eq_res(eq, FORCE_FREE);
  	}

@@ -540,7 +540,7 @@ u64 ehea_destroy_qp_res(struct ehea_qp *

  int ehea_destroy_qp(struct ehea_qp *qp)
  {
-	u64 hret;
+	u64 hret, aer, aerr;
  	if (!qp)
  		return 0;

@@ -548,7 +548,7 @@ int ehea_destroy_qp(struct ehea_qp *qp)

  	hret = ehea_destroy_qp_res(qp, NORMAL_FREE);
  	if (hret == H_R_STATE) {
-		ehea_error_data(qp->adapter, qp->fw_handle);
+		ehea_error_data(qp->adapter, qp->fw_handle, &aer, &aerr);
  		hret = ehea_destroy_qp_res(qp, FORCE_FREE);
  	}

@@ -986,42 +986,45 @@ void print_error_data(u64 *data)
  	if (length > EHEA_PAGESIZE)
  		length = EHEA_PAGESIZE;

-	if (type == 0x8) /* Queue Pair */
+	if (type == EHEA_AER_RESTYPE_QP)
  		ehea_error("QP (resource=%llX) state: AER=0x%llX, AERR=0x%llX, "
  			   "port=%llX", resource, data[6], data[12], data[22]);
-
-	if (type == 0x4) /* Completion Queue */
+	else if (type == EHEA_AER_RESTYPE_CQ)
  		ehea_error("CQ (resource=%llX) state: AER=0x%llX", resource,
  			   data[6]);
-
-	if (type == 0x3) /* Event Queue */
+	else if (type == EHEA_AER_RESTYPE_EQ)
  		ehea_error("EQ (resource=%llX) state: AER=0x%llX", resource,
  			   data[6]);

  	ehea_dump(data, length, "error data");
  }

-void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle)
+u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle,
+		    u64 *aer, u64 *aerr)
  {
  	unsigned long ret;
  	u64 *rblock;
+	u64 type = 0;

  	rblock = (void *)get_zeroed_page(GFP_KERNEL);
  	if (!rblock) {
  		ehea_error("Cannot allocate rblock memory.");
-		return;
+		goto out;
  	}

-	ret = ehea_h_error_data(adapter->handle,
-				res_handle,
-				rblock);
+	ret = ehea_h_error_data(adapter->handle, res_handle, rblock);

-	if (ret == H_R_STATE)
-		ehea_error("No error data is available: %llX.", res_handle);
-	else if (ret == H_SUCCESS)
+	if (ret == H_SUCCESS) {
+		type = EHEA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+		*aer = rblock[6];
+		*aerr = rblock[12];
  		print_error_data(rblock);
-	else
+	} else if (ret == H_R_STATE) {
+		ehea_error("No error data available: %llX.", res_handle);
+	} else
  		ehea_error("Error data could not be fetched: %llX", res_handle);

  	free_page((unsigned long)rblock);
+out:
+	return type;
  }
diff -Nurp linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_qmr.h linux-2.6.34-rc4//drivers/net/ehea/ehea_qmr.h
--- linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_qmr.h	2010-04-19 11:54:07.000000000 +0200
+++ linux-2.6.34-rc4//drivers/net/ehea/ehea_qmr.h	2010-04-19 11:57:12.000000000 +0200
@@ -154,6 +154,9 @@ struct ehea_rwqe {
  #define EHEA_CQE_STAT_ERR_IP       0x2000
  #define EHEA_CQE_STAT_ERR_CRC      0x1000

+/* Defines which bad send cqe stati lead to a port reset */
+#define EHEA_CQE_STAT_RESET_MASK   0x0002
+
  struct ehea_cqe {
  	u64 wr_id;		/* work request ID from WQE */
  	u8 type;
@@ -187,6 +190,14 @@ struct ehea_cqe {
  #define EHEA_EQE_SM_MECH_NUMBER  EHEA_BMASK_IBM(48, 55)
  #define EHEA_EQE_SM_PORT_NUMBER  EHEA_BMASK_IBM(56, 63)

+#define EHEA_AER_RESTYPE_QP  0x8
+#define EHEA_AER_RESTYPE_CQ  0x4
+#define EHEA_AER_RESTYPE_EQ  0x3
+
+/* Defines which affiliated errors lead to a port reset */
+#define EHEA_AER_RESET_MASK   0xFFFFFFFFFEFFFFFFULL
+#define EHEA_AERR_RESET_MASK  0xFFFFFFFFFFFFFFFFULL
+
  struct ehea_eqe {
  	u64 entry;
  };
@@ -379,7 +390,8 @@ int ehea_gen_smr(struct ehea_adapter *ad

  int ehea_rem_mr(struct ehea_mr *mr);

-void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle);
+u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle,
+		    u64 *aer, u64 *aerr);

  int ehea_add_sect_bmap(unsigned long pfn, unsigned long nr_pages);
  int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] ehea: error handling improvement
@ 2010-04-19 12:08 ` Thomas Klein
  0 siblings, 0 replies; 4+ messages in thread
From: Thomas Klein @ 2010-04-19 12:08 UTC (permalink / raw)
  To: David S. Miller
  Cc: linux-ppc, netdev, Christoph Raisch, linux-kernel, Jan-Bernd Themann

Reset a port's resources only if they're actually in an error state

Signed-off-by: Thomas Klein <tklein@de.ibm.com>
---

Patch created against 2.6.34-rc4

diff -Nurp linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_main.c linux-2.6.34-rc4//drivers/net/ehea/ehea_main.c
--- linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_main.c	2010-04-19 11:54:07.000000000 +0200
+++ linux-2.6.34-rc4//drivers/net/ehea/ehea_main.c	2010-04-19 11:55:43.000000000 +0200
@@ -791,11 +791,17 @@ static struct ehea_cqe *ehea_proc_cqes(s
  		cqe_counter++;
  		rmb();
  		if (cqe->status & EHEA_CQE_STAT_ERR_MASK) {
-			ehea_error("Send Completion Error: Resetting port");
+			ehea_error("Bad send completion status=0x%04X",
+				   cqe->status);
+
  			if (netif_msg_tx_err(pr->port))
  				ehea_dump(cqe, sizeof(*cqe), "Send CQE");
-			ehea_schedule_port_reset(pr->port);
-			break;
+
+			if (cqe->status & EHEA_CQE_STAT_RESET_MASK) {
+				ehea_error("Resetting port");
+				ehea_schedule_port_reset(pr->port);
+				break;
+			}
  		}

  		if (netif_msg_tx_done(pr->port))
@@ -901,6 +907,8 @@ static irqreturn_t ehea_qp_aff_irq_handl
  	struct ehea_eqe *eqe;
  	struct ehea_qp *qp;
  	u32 qp_token;
+	u64 resource_type, aer, aerr;
+	int reset_port = 0;

  	eqe = ehea_poll_eq(port->qp_eq);

@@ -910,11 +918,24 @@ static irqreturn_t ehea_qp_aff_irq_handl
  			   eqe->entry, qp_token);

  		qp = port->port_res[qp_token].qp;
-		ehea_error_data(port->adapter, qp->fw_handle);
+
+		resource_type = ehea_error_data(port->adapter, qp->fw_handle,
+						&aer, &aerr);
+
+		if (resource_type == EHEA_AER_RESTYPE_QP) {
+			if ((aer & EHEA_AER_RESET_MASK) ||
+			    (aerr & EHEA_AERR_RESET_MASK))
+				 reset_port = 1;
+		} else
+			reset_port = 1;   /* Reset in case of CQ or EQ error */
+
  		eqe = ehea_poll_eq(port->qp_eq);
  	}

-	ehea_schedule_port_reset(port);
+	if (reset_port) {
+		ehea_error("Resetting port");
+		ehea_schedule_port_reset(port);
+	}

  	return IRQ_HANDLED;
  }
diff -Nurp linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_qmr.c linux-2.6.34-rc4//drivers/net/ehea/ehea_qmr.c
--- linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_qmr.c	2010-04-19 11:54:07.000000000 +0200
+++ linux-2.6.34-rc4//drivers/net/ehea/ehea_qmr.c	2010-04-19 11:56:36.000000000 +0200
@@ -229,14 +229,14 @@ u64 ehea_destroy_cq_res(struct ehea_cq *

  int ehea_destroy_cq(struct ehea_cq *cq)
  {
-	u64 hret;
+	u64 hret, aer, aerr;
  	if (!cq)
  		return 0;

  	hcp_epas_dtor(&cq->epas);
  	hret = ehea_destroy_cq_res(cq, NORMAL_FREE);
  	if (hret == H_R_STATE) {
-		ehea_error_data(cq->adapter, cq->fw_handle);
+		ehea_error_data(cq->adapter, cq->fw_handle, &aer, &aerr);
  		hret = ehea_destroy_cq_res(cq, FORCE_FREE);
  	}

@@ -357,7 +357,7 @@ u64 ehea_destroy_eq_res(struct ehea_eq *

  int ehea_destroy_eq(struct ehea_eq *eq)
  {
-	u64 hret;
+	u64 hret, aer, aerr;
  	if (!eq)
  		return 0;

@@ -365,7 +365,7 @@ int ehea_destroy_eq(struct ehea_eq *eq)

  	hret = ehea_destroy_eq_res(eq, NORMAL_FREE);
  	if (hret == H_R_STATE) {
-		ehea_error_data(eq->adapter, eq->fw_handle);
+		ehea_error_data(eq->adapter, eq->fw_handle, &aer, &aerr);
  		hret = ehea_destroy_eq_res(eq, FORCE_FREE);
  	}

@@ -540,7 +540,7 @@ u64 ehea_destroy_qp_res(struct ehea_qp *

  int ehea_destroy_qp(struct ehea_qp *qp)
  {
-	u64 hret;
+	u64 hret, aer, aerr;
  	if (!qp)
  		return 0;

@@ -548,7 +548,7 @@ int ehea_destroy_qp(struct ehea_qp *qp)

  	hret = ehea_destroy_qp_res(qp, NORMAL_FREE);
  	if (hret == H_R_STATE) {
-		ehea_error_data(qp->adapter, qp->fw_handle);
+		ehea_error_data(qp->adapter, qp->fw_handle, &aer, &aerr);
  		hret = ehea_destroy_qp_res(qp, FORCE_FREE);
  	}

@@ -986,42 +986,45 @@ void print_error_data(u64 *data)
  	if (length > EHEA_PAGESIZE)
  		length = EHEA_PAGESIZE;

-	if (type == 0x8) /* Queue Pair */
+	if (type == EHEA_AER_RESTYPE_QP)
  		ehea_error("QP (resource=%llX) state: AER=0x%llX, AERR=0x%llX, "
  			   "port=%llX", resource, data[6], data[12], data[22]);
-
-	if (type == 0x4) /* Completion Queue */
+	else if (type == EHEA_AER_RESTYPE_CQ)
  		ehea_error("CQ (resource=%llX) state: AER=0x%llX", resource,
  			   data[6]);
-
-	if (type == 0x3) /* Event Queue */
+	else if (type == EHEA_AER_RESTYPE_EQ)
  		ehea_error("EQ (resource=%llX) state: AER=0x%llX", resource,
  			   data[6]);

  	ehea_dump(data, length, "error data");
  }

-void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle)
+u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle,
+		    u64 *aer, u64 *aerr)
  {
  	unsigned long ret;
  	u64 *rblock;
+	u64 type = 0;

  	rblock = (void *)get_zeroed_page(GFP_KERNEL);
  	if (!rblock) {
  		ehea_error("Cannot allocate rblock memory.");
-		return;
+		goto out;
  	}

-	ret = ehea_h_error_data(adapter->handle,
-				res_handle,
-				rblock);
+	ret = ehea_h_error_data(adapter->handle, res_handle, rblock);

-	if (ret == H_R_STATE)
-		ehea_error("No error data is available: %llX.", res_handle);
-	else if (ret == H_SUCCESS)
+	if (ret == H_SUCCESS) {
+		type = EHEA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
+		*aer = rblock[6];
+		*aerr = rblock[12];
  		print_error_data(rblock);
-	else
+	} else if (ret == H_R_STATE) {
+		ehea_error("No error data available: %llX.", res_handle);
+	} else
  		ehea_error("Error data could not be fetched: %llX", res_handle);

  	free_page((unsigned long)rblock);
+out:
+	return type;
  }
diff -Nurp linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_qmr.h linux-2.6.34-rc4//drivers/net/ehea/ehea_qmr.h
--- linux-2.6.34-rc4.orig//drivers/net/ehea/ehea_qmr.h	2010-04-19 11:54:07.000000000 +0200
+++ linux-2.6.34-rc4//drivers/net/ehea/ehea_qmr.h	2010-04-19 11:57:12.000000000 +0200
@@ -154,6 +154,9 @@ struct ehea_rwqe {
  #define EHEA_CQE_STAT_ERR_IP       0x2000
  #define EHEA_CQE_STAT_ERR_CRC      0x1000

+/* Defines which bad send cqe stati lead to a port reset */
+#define EHEA_CQE_STAT_RESET_MASK   0x0002
+
  struct ehea_cqe {
  	u64 wr_id;		/* work request ID from WQE */
  	u8 type;
@@ -187,6 +190,14 @@ struct ehea_cqe {
  #define EHEA_EQE_SM_MECH_NUMBER  EHEA_BMASK_IBM(48, 55)
  #define EHEA_EQE_SM_PORT_NUMBER  EHEA_BMASK_IBM(56, 63)

+#define EHEA_AER_RESTYPE_QP  0x8
+#define EHEA_AER_RESTYPE_CQ  0x4
+#define EHEA_AER_RESTYPE_EQ  0x3
+
+/* Defines which affiliated errors lead to a port reset */
+#define EHEA_AER_RESET_MASK   0xFFFFFFFFFEFFFFFFULL
+#define EHEA_AERR_RESET_MASK  0xFFFFFFFFFFFFFFFFULL
+
  struct ehea_eqe {
  	u64 entry;
  };
@@ -379,7 +390,8 @@ int ehea_gen_smr(struct ehea_adapter *ad

  int ehea_rem_mr(struct ehea_mr *mr);

-void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle);
+u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle,
+		    u64 *aer, u64 *aerr);

  int ehea_add_sect_bmap(unsigned long pfn, unsigned long nr_pages);
  int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] ehea: error handling improvement
  2010-04-19 12:08 ` Thomas Klein
@ 2010-04-21  2:16   ` David Miller
  -1 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2010-04-21  2:16 UTC (permalink / raw)
  To: osstklei; +Cc: raisch, themann, linux-kernel, linuxppc-dev, netdev

From: Thomas Klein <osstklei@de.ibm.com>
Date: Mon, 19 Apr 2010 14:08:11 +0200

> Reset a port's resources only if they're actually in an error state
> 
> Signed-off-by: Thomas Klein <tklein@de.ibm.com>
> ---
> 
> Patch created against 2.6.34-rc4

There are several problems with these patches:

1) They are corrupted by your email client, lines unchanged
   begin with one space character instead of two.  Therefore
   even 'patch' wouldn't accept these changes.

2) The double slash in the patch file paths make git not
   accept the change.  Please don't put double-slashes in
   your patch paths as that canonically means "/".

3) These are not appropriate for net-2.6 as we are deep in
   the -rcX series at this point and only the most diabolical
   bug fixes are appropriate.  Therefore, please generate these
   against net-next-2.6, thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] ehea: error handling improvement
@ 2010-04-21  2:16   ` David Miller
  0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2010-04-21  2:16 UTC (permalink / raw)
  To: osstklei; +Cc: linuxppc-dev, netdev, raisch, linux-kernel, themann

From: Thomas Klein <osstklei@de.ibm.com>
Date: Mon, 19 Apr 2010 14:08:11 +0200

> Reset a port's resources only if they're actually in an error state
> 
> Signed-off-by: Thomas Klein <tklein@de.ibm.com>
> ---
> 
> Patch created against 2.6.34-rc4

There are several problems with these patches:

1) They are corrupted by your email client, lines unchanged
   begin with one space character instead of two.  Therefore
   even 'patch' wouldn't accept these changes.

2) The double slash in the patch file paths make git not
   accept the change.  Please don't put double-slashes in
   your patch paths as that canonically means "/".

3) These are not appropriate for net-2.6 as we are deep in
   the -rcX series at this point and only the most diabolical
   bug fixes are appropriate.  Therefore, please generate these
   against net-next-2.6, thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-04-21  2:15 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-19 12:08 [PATCH 1/2] ehea: error handling improvement Thomas Klein
2010-04-19 12:08 ` Thomas Klein
2010-04-21  2:16 ` David Miller
2010-04-21  2:16   ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.