linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
@ 2007-12-06 15:07 Joachim Fenkes
  2007-12-06 15:48 ` Arnd Bergmann
  0 siblings, 1 reply; 19+ messages in thread
From: Joachim Fenkes @ 2007-12-06 15:07 UTC (permalink / raw)
  To: LinuxPPC-Dev, LKML, OF-General, Roland Dreier, OF-EWG
  Cc: Hoang-Nam Nguyen, Christoph Raisch, Stefan Roscher, Marcus Eder

All firmware versions on POWER5 systems have a locking issue in the
HCA-related hCalls that can cause loss of Infiniband connectivity if
allocate and free calls happen in parallel. This may for example be caused
if two processes are using OpenMPI in parallel.
Circumvent this by serializing all HCA-related hCalls on POWER5.

Signed-off-by: Joachim Fenkes <fenkes@de.ibm.com>
---

We tested this patch, especially the autodetection, and it works okay.
Please review and apply for 2.6.24-rc5 - thanks!

 drivers/infiniband/hw/ehca/ehca_main.c |   16 ++++++++++++++++
 drivers/infiniband/hw/ehca/hcp_if.c    |   28 +++++++++++-----------------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 90d4334..8f33d06 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -43,6 +43,9 @@
 #ifdef CONFIG_PPC_64K_PAGES
 #include <linux/slab.h>
 #endif
+
+#include <asm/cputable.h>
+
 #include "ehca_classes.h"
 #include "ehca_iverbs.h"
 #include "ehca_mrmw.h"
@@ -66,6 +69,7 @@ int ehca_poll_all_eqs  = 1;
 int ehca_static_rate   = -1;
 int ehca_scaling_code  = 0;
 int ehca_mr_largepage  = 1;
+int ehca_lock_hcalls   = -1;
 
 module_param_named(open_aqp1,     ehca_open_aqp1,     int, S_IRUGO);
 module_param_named(debug_level,   ehca_debug_level,   int, S_IRUGO);
@@ -77,6 +81,7 @@ module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, S_IRUGO);
 module_param_named(static_rate,   ehca_static_rate,   int, S_IRUGO);
 module_param_named(scaling_code,  ehca_scaling_code,  int, S_IRUGO);
 module_param_named(mr_largepage,  ehca_mr_largepage,  int, S_IRUGO);
+module_param_named(lock_hcalls,   ehca_lock_hcalls,   bool, S_IRUGO);
 
 MODULE_PARM_DESC(open_aqp1,
 		 "AQP1 on startup (0: no (default), 1: yes)");
@@ -102,6 +107,9 @@ MODULE_PARM_DESC(scaling_code,
 MODULE_PARM_DESC(mr_largepage,
 		 "use large page for MR (0: use PAGE_SIZE (default), "
 		 "1: use large page depending on MR size");
+MODULE_PARM_DESC(lock_hcalls,
+		 "serialize all hCalls made by the driver "
+		 "(default: autodetect)");
 
 DEFINE_RWLOCK(ehca_qp_idr_lock);
 DEFINE_RWLOCK(ehca_cq_idr_lock);
@@ -924,6 +932,14 @@ int __init ehca_module_init(void)
 	printk(KERN_INFO "eHCA Infiniband Device Driver "
 	       "(Version " HCAD_VERSION ")\n");
 
+	/* Autodetect hCall locking -- we can't read the firmware version
+	 * directly, but we know that starting with POWER6, all firmware
+	 * versions are good.
+	 */
+	if (ehca_lock_hcalls == -1)
+		ehca_lock_hcalls = !(cur_cpu_spec->cpu_user_features
+				     & PPC_FEATURE_ARCH_2_05);
+
 	ret = ehca_create_comp_pool();
 	if (ret) {
 		ehca_gen_err("Cannot create comp pool.");
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index c16a213..331b5e8 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -89,6 +89,7 @@
 #define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
 
 static DEFINE_SPINLOCK(hcall_lock);
+extern int ehca_lock_hcalls;
 
 static u32 get_longbusy_msecs(int longbusy_rc)
 {
@@ -120,26 +121,21 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
 				    unsigned long arg7)
 {
 	long ret;
-	int i, sleep_msecs, do_lock;
-	unsigned long flags;
+	int i, sleep_msecs;
+	unsigned long flags = 0;
 
 	ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
 		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
 
-	/* lock H_FREE_RESOURCE(MR) against itself and H_ALLOC_RESOURCE(MR) */
-	if ((opcode == H_FREE_RESOURCE) && (arg7 == 5)) {
-		arg7 = 0; /* better not upset firmware */
-		do_lock = 1;
-	}
-
 	for (i = 0; i < 5; i++) {
-		if (do_lock)
+		/* serialize hCalls to work around firmware issue */
+		if (ehca_lock_hcalls)
 			spin_lock_irqsave(&hcall_lock, flags);
 
 		ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
 					 arg5, arg6, arg7);
 
-		if (do_lock)
+		if (ehca_lock_hcalls)
 			spin_unlock_irqrestore(&hcall_lock, flags);
 
 		if (H_IS_LONG_BUSY(ret)) {
@@ -174,24 +170,22 @@ static long ehca_plpar_hcall9(unsigned long opcode,
 			      unsigned long arg9)
 {
 	long ret;
-	int i, sleep_msecs, do_lock;
+	int i, sleep_msecs;
 	unsigned long flags = 0;
 
 	ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
 		     arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9);
 
-	/* lock H_ALLOC_RESOURCE(MR) against itself and H_FREE_RESOURCE(MR) */
-	do_lock = ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5));
-
 	for (i = 0; i < 5; i++) {
-		if (do_lock)
+		/* serialize hCalls to work around firmware issue */
+		if (ehca_lock_hcalls)
 			spin_lock_irqsave(&hcall_lock, flags);
 
 		ret = plpar_hcall9(opcode, outs,
 				   arg1, arg2, arg3, arg4, arg5,
 				   arg6, arg7, arg8, arg9);
 
-		if (do_lock)
+		if (ehca_lock_hcalls)
 			spin_unlock_irqrestore(&hcall_lock, flags);
 
 		if (H_IS_LONG_BUSY(ret)) {
@@ -821,7 +815,7 @@ u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
 	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
 				       adapter_handle.handle,    /* r4 */
 				       mr->ipz_mr_handle.handle, /* r5 */
-				       0, 0, 0, 0, 5);
+				       0, 0, 0, 0, 0);
 }
 
 u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-- 
1.5.2



^ permalink raw reply related	[flat|nested] 19+ messages in thread
[parent not found: <OF85E31FAA.DADA6039-ONC12573AA.005439C8-C12573AA.005A132E@LocalDomain>]

end of thread, other threads:[~2007-12-13 22:00 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-12-06 15:07 [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5 Joachim Fenkes
2007-12-06 15:48 ` Arnd Bergmann
2007-12-06 18:27   ` Roland Dreier
2007-12-07  9:58     ` Arnd Bergmann
2007-12-09 23:22       ` Roland Dreier
2007-12-10 17:41         ` Joachim Fenkes
2007-12-10 21:47           ` Roland Dreier
2007-12-11  8:38             ` Joachim Fenkes
2007-12-12 12:14               ` [ewg] " Or Gerlitz
2007-12-12 16:02                 ` Christoph Raisch
2007-12-12 19:09                 ` Roland Dreier
2007-12-13  8:30                   ` Or Gerlitz
2007-12-13 19:22                     ` [ofa-general] " Caitlin Bestler
2007-12-13 20:59                       ` Joachim Fenkes
2007-12-13 21:08                         ` Caitlin Bestler
2007-12-13 21:35                           ` Joachim Fenkes
2007-12-13 21:48                           ` [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: SerializeHCA-related " Sean Hefty
2007-12-07 16:25     ` [PATCH] IB/ehca: Serialize HCA-related " Joachim Fenkes
     [not found] <OF85E31FAA.DADA6039-ONC12573AA.005439C8-C12573AA.005A132E@LocalDomain>
2007-12-10 17:59 ` Joachim Fenkes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).