linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
@ 2007-12-06 15:07 Joachim Fenkes
  2007-12-06 15:48 ` Arnd Bergmann
  0 siblings, 1 reply; 19+ messages in thread
From: Joachim Fenkes @ 2007-12-06 15:07 UTC (permalink / raw)
  To: LinuxPPC-Dev, LKML, OF-General, Roland Dreier, OF-EWG
  Cc: Hoang-Nam Nguyen, Christoph Raisch, Stefan Roscher, Marcus Eder

All firmware versions on POWER5 systems have a locking issue in the
HCA-related hCalls that can cause loss of Infiniband connectivity if
allocate and free calls happen in parallel. This may for example be caused
if two processes are using OpenMPI in parallel.
Circumvent this by serializing all HCA-related hCalls on POWER5.

Signed-off-by: Joachim Fenkes <fenkes@de.ibm.com>
---

We tested this patch, especially the autodetection, and it works okay.
Please review and apply for 2.6.24-rc5 - thanks!

 drivers/infiniband/hw/ehca/ehca_main.c |   16 ++++++++++++++++
 drivers/infiniband/hw/ehca/hcp_if.c    |   28 +++++++++++-----------------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 90d4334..8f33d06 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -43,6 +43,9 @@
 #ifdef CONFIG_PPC_64K_PAGES
 #include <linux/slab.h>
 #endif
+
+#include <asm/cputable.h>
+
 #include "ehca_classes.h"
 #include "ehca_iverbs.h"
 #include "ehca_mrmw.h"
@@ -66,6 +69,7 @@ int ehca_poll_all_eqs  = 1;
 int ehca_static_rate   = -1;
 int ehca_scaling_code  = 0;
 int ehca_mr_largepage  = 1;
+int ehca_lock_hcalls   = -1;
 
 module_param_named(open_aqp1,     ehca_open_aqp1,     int, S_IRUGO);
 module_param_named(debug_level,   ehca_debug_level,   int, S_IRUGO);
@@ -77,6 +81,7 @@ module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, S_IRUGO);
 module_param_named(static_rate,   ehca_static_rate,   int, S_IRUGO);
 module_param_named(scaling_code,  ehca_scaling_code,  int, S_IRUGO);
 module_param_named(mr_largepage,  ehca_mr_largepage,  int, S_IRUGO);
+module_param_named(lock_hcalls,   ehca_lock_hcalls,   bool, S_IRUGO);
 
 MODULE_PARM_DESC(open_aqp1,
 		 "AQP1 on startup (0: no (default), 1: yes)");
@@ -102,6 +107,9 @@ MODULE_PARM_DESC(scaling_code,
 MODULE_PARM_DESC(mr_largepage,
 		 "use large page for MR (0: use PAGE_SIZE (default), "
 		 "1: use large page depending on MR size");
+MODULE_PARM_DESC(lock_hcalls,
+		 "serialize all hCalls made by the driver "
+		 "(default: autodetect)");
 
 DEFINE_RWLOCK(ehca_qp_idr_lock);
 DEFINE_RWLOCK(ehca_cq_idr_lock);
@@ -924,6 +932,14 @@ int __init ehca_module_init(void)
 	printk(KERN_INFO "eHCA Infiniband Device Driver "
 	       "(Version " HCAD_VERSION ")\n");
 
+	/* Autodetect hCall locking -- we can't read the firmware version
+	 * directly, but we know that starting with POWER6, all firmware
+	 * versions are good.
+	 */
+	if (ehca_lock_hcalls == -1)
+		ehca_lock_hcalls = !(cur_cpu_spec->cpu_user_features
+				     & PPC_FEATURE_ARCH_2_05);
+
 	ret = ehca_create_comp_pool();
 	if (ret) {
 		ehca_gen_err("Cannot create comp pool.");
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index c16a213..331b5e8 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -89,6 +89,7 @@
 #define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
 
 static DEFINE_SPINLOCK(hcall_lock);
+extern int ehca_lock_hcalls;
 
 static u32 get_longbusy_msecs(int longbusy_rc)
 {
@@ -120,26 +121,21 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
 				    unsigned long arg7)
 {
 	long ret;
-	int i, sleep_msecs, do_lock;
-	unsigned long flags;
+	int i, sleep_msecs;
+	unsigned long flags = 0;
 
 	ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
 		     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
 
-	/* lock H_FREE_RESOURCE(MR) against itself and H_ALLOC_RESOURCE(MR) */
-	if ((opcode == H_FREE_RESOURCE) && (arg7 == 5)) {
-		arg7 = 0; /* better not upset firmware */
-		do_lock = 1;
-	}
-
 	for (i = 0; i < 5; i++) {
-		if (do_lock)
+		/* serialize hCalls to work around firmware issue */
+		if (ehca_lock_hcalls)
 			spin_lock_irqsave(&hcall_lock, flags);
 
 		ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
 					 arg5, arg6, arg7);
 
-		if (do_lock)
+		if (ehca_lock_hcalls)
 			spin_unlock_irqrestore(&hcall_lock, flags);
 
 		if (H_IS_LONG_BUSY(ret)) {
@@ -174,24 +170,22 @@ static long ehca_plpar_hcall9(unsigned long opcode,
 			      unsigned long arg9)
 {
 	long ret;
-	int i, sleep_msecs, do_lock;
+	int i, sleep_msecs;
 	unsigned long flags = 0;
 
 	ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
 		     arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9);
 
-	/* lock H_ALLOC_RESOURCE(MR) against itself and H_FREE_RESOURCE(MR) */
-	do_lock = ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5));
-
 	for (i = 0; i < 5; i++) {
-		if (do_lock)
+		/* serialize hCalls to work around firmware issue */
+		if (ehca_lock_hcalls)
 			spin_lock_irqsave(&hcall_lock, flags);
 
 		ret = plpar_hcall9(opcode, outs,
 				   arg1, arg2, arg3, arg4, arg5,
 				   arg6, arg7, arg8, arg9);
 
-		if (do_lock)
+		if (ehca_lock_hcalls)
 			spin_unlock_irqrestore(&hcall_lock, flags);
 
 		if (H_IS_LONG_BUSY(ret)) {
@@ -821,7 +815,7 @@ u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
 	return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
 				       adapter_handle.handle,    /* r4 */
 				       mr->ipz_mr_handle.handle, /* r5 */
-				       0, 0, 0, 0, 5);
+				       0, 0, 0, 0, 0);
 }
 
 u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
-- 
1.5.2



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-06 15:07 [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5 Joachim Fenkes
@ 2007-12-06 15:48 ` Arnd Bergmann
  2007-12-06 18:27   ` Roland Dreier
  0 siblings, 1 reply; 19+ messages in thread
From: Arnd Bergmann @ 2007-12-06 15:48 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Joachim Fenkes, LKML, OF-General, Roland Dreier, OF-EWG,
	Stefan Roscher, Christoph Raisch, Marcus Eder

On Thursday 06 December 2007, Joachim Fenkes wrote:
>         printk(KERN_INFO "eHCA Infiniband Device Driver "
>                "(Version " HCAD_VERSION ")\n");
>  
> +       /* Autodetect hCall locking -- we can't read the firmware version
> +        * directly, but we know that starting with POWER6, all firmware
> +        * versions are good.
> +        */
> +       if (ehca_lock_hcalls == -1)
> +               ehca_lock_hcalls = !(cur_cpu_spec->cpu_user_features
> +                                    & PPC_FEATURE_ARCH_2_05);
> +
>         ret = ehca_create_comp_pool();
>         if (ret) {
>                 ehca_gen_err("Cannot create comp pool.");

We already talked about this yesterday, but I still feel that checking the
instruction set of the CPU should not be used to determine whether a
specific device driver implementation is used int hypervisor.

At the very least, I think you should change this to read the hypervisor
version number from the device tree, though the ideal solution would be
to have the absence of this bug encoded in the device node for the ehca
device itself.

Regarding the performance problem, have you checked whether converting all
your spin_lock_irqsave to spin_lock/spin_lock_irq improves your performance
on the older machines? Maybe it's already fast enough that way.

	Arnd <><

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-06 15:48 ` Arnd Bergmann
@ 2007-12-06 18:27   ` Roland Dreier
  2007-12-07  9:58     ` Arnd Bergmann
  2007-12-07 16:25     ` [PATCH] IB/ehca: Serialize HCA-related " Joachim Fenkes
  0 siblings, 2 replies; 19+ messages in thread
From: Roland Dreier @ 2007-12-06 18:27 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linuxppc-dev, Joachim Fenkes, LKML, OF-General, Roland Dreier,
	OF-EWG, Stefan Roscher, Christoph Raisch, Marcus Eder

 > > +               ehca_lock_hcalls = !(cur_cpu_spec->cpu_user_features
 > > +                                    & PPC_FEATURE_ARCH_2_05);

 > We already talked about this yesterday, but I still feel that checking the
 > instruction set of the CPU should not be used to determine whether a
 > specific device driver implementation is used int hypervisor.

I had the same reaction... is testing cpu_user_features really the
best way to detect this issue?

I'll hold off applying this for a few days so you guys can decide the
best thing to do.  We'll definitely get some fix into 2.6.24 but we
have time to make a good decision.

 > Regarding the performance problem, have you checked whether converting all
 > your spin_lock_irqsave to spin_lock/spin_lock_irq improves your performance
 > on the older machines? Maybe it's already fast enough that way.

It does seem that the only places that the hcall_lock is taken also
use msleep, so they must always be in process context.  So you can
safely just use spin_lock(), right?

 - R.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-06 18:27   ` Roland Dreier
@ 2007-12-07  9:58     ` Arnd Bergmann
  2007-12-09 23:22       ` Roland Dreier
  2007-12-07 16:25     ` [PATCH] IB/ehca: Serialize HCA-related " Joachim Fenkes
  1 sibling, 1 reply; 19+ messages in thread
From: Arnd Bergmann @ 2007-12-07  9:58 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Roland Dreier, Joachim Fenkes, LKML, OF-EWG, Christoph Raisch,
	Marcus Eder, OF-General, Stefan Roscher

On Thursday 06 December 2007, Roland Dreier wrote:
>  > Regarding the performance problem, have you checked whether converting all
>  > your spin_lock_irqsave to spin_lock/spin_lock_irq improves your performance
>  > on the older machines? Maybe it's already fast enough that way.
> 
> It does seem that the only places that the hcall_lock is taken also
> use msleep, so they must always be in process context.  So you can
> safely just use spin_lock(), right?

I think it needs some more inspection. The msleep in there is only called
for hcalls that return H_IS_LONG_BUSY(). In theory, you can call
ehca_plpar_hcall_norets() from inside an interrupt handler if the
hcall in question never returns long busy.

	Arnd <><

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-06 18:27   ` Roland Dreier
  2007-12-07  9:58     ` Arnd Bergmann
@ 2007-12-07 16:25     ` Joachim Fenkes
  1 sibling, 0 replies; 19+ messages in thread
From: Joachim Fenkes @ 2007-12-07 16:25 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Arnd Bergmann, Christoph Raisch, OF-EWG, OF-General, LKML,
	linuxppc-dev, Marcus Eder, Roland Dreier, Stefan Roscher

Roland Dreier <rdreier@cisco.com> wrote on 06.12.2007 19:27:09:

>  > > +               ehca_lock_hcalls = 
!(cur_cpu_spec->cpu_user_features
>  > > +                                    & PPC_FEATURE_ARCH_2_05);
> 
>  > We already talked about this yesterday, but I still feel that 
checking the
>  > instruction set of the CPU should not be used to determine whether a
>  > specific device driver implementation is used int hypervisor.
> 
> I had the same reaction... is testing cpu_user_features really the
> best way to detect this issue?

I concur it's not nice, but it was the only feasible method we could find 
without adding a "bug fixed" feature flag to the partition<->firmware 
interface. The firmware version reported in the OFDT is not a reliable 
enough source, and even if it were, it would require a lot of string 
parsing and matching against tables.

We're taking this to the firmware architects at the moment, but they're 
not very fond of the idea of reporting the absence of bugs through 
capability flags, as this could quickly lead to the exhaustion of flag 
bits. We'll let the discussion stew for a bit, but if we don't get this 
flag, we'll have to resort to the CPU features.
 
> I'll hold off applying this for a few days so you guys can decide the
> best thing to do.  We'll definitely get some fix into 2.6.24 but we
> have time to make a good decision.

Right.
 
>  > Regarding the performance problem, have you checked whether 
converting all
>  > your spin_lock_irqsave to spin_lock/spin_lock_irq improves your 
performance
>  > on the older machines? Maybe it's already fast enough that way.
> 
> It does seem that the only places that the hcall_lock is taken also
> use msleep, so they must always be in process context.  So you can
> safely just use spin_lock(), right?

As Arnd said, there are hCalls that will never return H_LONG_BUSY_*, such 
as H_QUERY_PORT and chums, so they will never sleep. The surrounding 
functions, though, are not prepared to be called from interrupt context 
(GFP_KERNEL comes to mind), so I agree that a simple spin_lock() will 
suffice. Thanks, Arnd, for pointing this out.

We'll keep you guys posted on the feature flag discussion. Until then, 
have a nice weekend!

Joachim

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-07  9:58     ` Arnd Bergmann
@ 2007-12-09 23:22       ` Roland Dreier
  2007-12-10 17:41         ` Joachim Fenkes
  0 siblings, 1 reply; 19+ messages in thread
From: Roland Dreier @ 2007-12-09 23:22 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: linuxppc-dev, Joachim Fenkes, LKML, OF-EWG, Christoph Raisch,
	Marcus Eder, OF-General, Stefan Roscher

 > I think it needs some more inspection. The msleep in there is only called
 > for hcalls that return H_IS_LONG_BUSY(). In theory, you can call
 > ehca_plpar_hcall_norets() from inside an interrupt handler if the
 > hcall in question never returns long busy.

Fair enough... according to Documentation/infiniband/core_locking.txt,
the only driver methods that cannot sleep are:

    create_ah
    modify_ah
    query_ah
    destroy_ah
    bind_mw
    post_send
    post_recv
    poll_cq
    req_notify_cq
    map_phys_fmr

and I don't think ehca does an hcall from any of those.  Of course
there might be other driver-internal code paths that I don't know
about.  Maybe do a quick audit and then stick might_sleep() in the
hcall functions to catch any mistakes?

 - R.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-09 23:22       ` Roland Dreier
@ 2007-12-10 17:41         ` Joachim Fenkes
  2007-12-10 21:47           ` Roland Dreier
  0 siblings, 1 reply; 19+ messages in thread
From: Joachim Fenkes @ 2007-12-10 17:41 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Arnd Bergmann, linuxppc-dev, LKML, OF-EWG, Christoph Raisch,
	Marcus Eder, OF-General, Stefan Roscher

On Monday 10 December 2007 00:22, Roland Dreier wrote:
> Fair enough... according to Documentation/infiniband/core_locking.txt,
> the only driver methods that cannot sleep are:
> 
>     [...]
>     map_phys_fmr

In fact, we do use hCalls there. Our hardware doesn't actually support FMRs,
so we translate a "map FMR" into a "reallocate PMR", which doesn't work
without hCalls. What's more, the hCalls involved (e.g. H_FREE_RESOURCE)
might well return H_LONG_BUSY, so the whole operation might sleep; no way
around it.

How should we deal with this?

Thanks,
  Joachim


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-10 17:41         ` Joachim Fenkes
@ 2007-12-10 21:47           ` Roland Dreier
  2007-12-11  8:38             ` Joachim Fenkes
  0 siblings, 1 reply; 19+ messages in thread
From: Roland Dreier @ 2007-12-10 21:47 UTC (permalink / raw)
  To: Joachim Fenkes
  Cc: Arnd Bergmann, linuxppc-dev, LKML, OF-EWG, Christoph Raisch,
	Marcus Eder, OF-General, Stefan Roscher

 > >     map_phys_fmr
 > 
 > In fact, we do use hCalls there. Our hardware doesn't actually support FMRs,
 > so we translate a "map FMR" into a "reallocate PMR", which doesn't work
 > without hCalls. What's more, the hCalls involved (e.g. H_FREE_RESOURCE)
 > might well return H_LONG_BUSY, so the whole operation might sleep; no way
 > around it.

It's a big problem.  If you cannot implement FMRs in such a way that
you can handling having map_phys_fmr being called in a context that
can't sleep, then I think the only option is to remove your FMR
support.  It's an optional device feature, so this should be OK
(although the iSER driver currently seems to depend on a device
supporting FMRs, which is probably going to be a problem with iWARP
support in the future anyway).

The fact that consumers can map FMRs from interrupt context, while
holding locks, etc, is pretty fundamental to the use of FMRs so I
don't see any way around the requirement that map_phys_fmr never
sleep.

 - R.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-10 21:47           ` Roland Dreier
@ 2007-12-11  8:38             ` Joachim Fenkes
  2007-12-12 12:14               ` [ewg] " Or Gerlitz
  0 siblings, 1 reply; 19+ messages in thread
From: Joachim Fenkes @ 2007-12-11  8:38 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Arnd Bergmann, Christoph Raisch, OF-EWG, OF-General, LKML,
	linuxppc-dev, Marcus Eder, Stefan Roscher

Roland Dreier <rdreier@cisco.com> wrote on 10.12.2007 22:47:37:

> It's a big problem.  If you cannot implement FMRs in such a way that
> you can handling having map_phys_fmr being called in a context that
> can't sleep, then I think the only option is to remove your FMR
> support.

That's kind of what I feared you would say =)

> It's an optional device feature, so this should be OK
> (although the iSER driver currently seems to depend on a device
> supporting FMRs, which is probably going to be a problem with iWARP
> support in the future anyway).

I don't feel very well with removing code from the driver that iSER seems 
to depend on. Are there plans to fix this in iSER?

In reality, PHYP rarely ever returns H_LONG_BUSY, and we haven't had any 
problems with iSER in the field yet. I admit that our FMR code is 
dangerous, but I prefer "dangerous but working for the customer" over "not 
working for the customer at all".
 
Maybe we can agree on keeping the status quo until no more ULPs depend on 
FMR, then remove FMR from ehca? If so, we'd also let the _irqsave 
spinlocks around hCalls stay in place.

Regards,
  Joachim

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [ewg] Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-11  8:38             ` Joachim Fenkes
@ 2007-12-12 12:14               ` Or Gerlitz
  2007-12-12 16:02                 ` Christoph Raisch
  2007-12-12 19:09                 ` Roland Dreier
  0 siblings, 2 replies; 19+ messages in thread
From: Or Gerlitz @ 2007-12-12 12:14 UTC (permalink / raw)
  To: Joachim Fenkes
  Cc: Roland Dreier, Arnd Bergmann, LKML, linuxppc-dev,
	Christoph Raisch, OF-General, Stefan Roscher

Joachim Fenkes wrote:
> Roland Dreier <rdreier@cisco.com> wrote on 10.12.2007 22:47:37:

>> It's an optional device feature, so this should be OK
>> (although the iSER driver currently seems to depend on a device
>> supporting FMRs, which is probably going to be a problem with iWARP
>> support in the future anyway).

> I don't feel very well with removing code from the driver that iSER seems 
> to depend on. Are there plans to fix this in iSER?

What is the fix you suggest, to add a device query that tells you for 
which verbs the documentation does not apply? or enhance the code of the 
  map_phys_fmr verb within the ehca driver to return error if called 
from non-sleepable context?

Or.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [ewg] Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-12 12:14               ` [ewg] " Or Gerlitz
@ 2007-12-12 16:02                 ` Christoph Raisch
  2007-12-12 19:09                 ` Roland Dreier
  1 sibling, 0 replies; 19+ messages in thread
From: Christoph Raisch @ 2007-12-12 16:02 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Arnd Bergmann, OF-General, Joachim Fenkes, LKML, linuxppc-dev,
	Roland Dreier, Stefan Roscher

Or Gerlitz <ogerlitz@voltaire.com> wrote on 12.12.2007 13:14:25:

> Joachim Fenkes wrote:
> > Roland Dreier <rdreier@cisco.com> wrote on 10.12.2007 22:47:37:
>
> >> It's an optional device feature, so this should be OK
> >> (although the iSER driver currently seems to depend on a device
> >> supporting FMRs, which is probably going to be a problem with iWARP
> >> support in the future anyway).
>
> > I don't feel very well with removing code from the driver that iSER
seems
> > to depend on. Are there plans to fix this in iSER?
>
> What is the fix you suggest, to add a device query that tells you for
> which verbs the documentation does not apply? or enhance the code of the
>   map_phys_fmr verb within the ehca driver to return error if called
> from non-sleepable context?

Roland,
what is your suggestion here?

We could implement both versions Or is proposing, but having both
at the same time sound like overkill.

Christoph R.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [ewg] Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-12 12:14               ` [ewg] " Or Gerlitz
  2007-12-12 16:02                 ` Christoph Raisch
@ 2007-12-12 19:09                 ` Roland Dreier
  2007-12-13  8:30                   ` Or Gerlitz
  1 sibling, 1 reply; 19+ messages in thread
From: Roland Dreier @ 2007-12-12 19:09 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Joachim Fenkes, Arnd Bergmann, LKML, linuxppc-dev,
	Christoph Raisch, OF-General, Stefan Roscher

 > What is the fix you suggest, to add a device query that tells you for 
 > which verbs the documentation does not apply? or enhance the code of the 
 >   map_phys_fmr verb within the ehca driver to return error if called 
 > from non-sleepable context?

I think the right fix for iSER would be to make iSER work even for
devices that don't support FMRs.  For example cxgb3 doesn't implement
FMRs so if anyone ever updates iSER to work on iWARP and not just IB,
then this is something that has to be tackled anyway.  Then ehca could
just get rid of the FMR support it has.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [ewg] Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-12 19:09                 ` Roland Dreier
@ 2007-12-13  8:30                   ` Or Gerlitz
  2007-12-13 19:22                     ` [ofa-general] " Caitlin Bestler
  0 siblings, 1 reply; 19+ messages in thread
From: Or Gerlitz @ 2007-12-13  8:30 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Joachim Fenkes, Arnd Bergmann, LKML, linuxppc-dev,
	Christoph Raisch, OF-General, Stefan Roscher

Roland Dreier wrote:
> I think the right fix for iSER would be to make iSER work even for
> devices that don't support FMRs.  For example cxgb3 doesn't implement
> FMRs so if anyone ever updates iSER to work on iWARP and not just IB,
> then this is something that has to be tackled anyway.  Then ehca could
> just get rid of the FMR support it has.

OK, The iSER design took into account the case of many initiators 
running on strong/modern machines talking to possibly lightweight 
embedded target for which the processing cost per I/O at the target side 
should be minimized, that is at most --one-- RDMA operation should be 
issued by the target to serve an I/O request.

For that end, iSER works with one descriptor (called stag in iWARP and 
rkey in IB) per I/O direction sent from the initiator to the target and 
hence can't work without some sort of FMR implementation.

The current implementation of the open iscsi initiator makes sure to 
issue commands in thread (sleepable) context, see iscsi_xmitworker and 
references to it in drivers/scsi/libiscsi.c , so this keeps ehca users 
safe for the time being.

Or.




^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-13  8:30                   ` Or Gerlitz
@ 2007-12-13 19:22                     ` Caitlin Bestler
  2007-12-13 20:59                       ` Joachim Fenkes
  0 siblings, 1 reply; 19+ messages in thread
From: Caitlin Bestler @ 2007-12-13 19:22 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Roland Dreier, Arnd Bergmann, Joachim Fenkes, LKML, linuxppc-dev,
	OF-General, Stefan Roscher

On Dec 13, 2007 12:30 AM, Or Gerlitz <ogerlitz@voltaire.com> wrote:
> Roland Dreier wrote:
> > I think the right fix for iSER would be to make iSER work even for
> > devices that don't support FMRs.  For example cxgb3 doesn't implement
> > FMRs so if anyone ever updates iSER to work on iWARP and not just IB,
> > then this is something that has to be tackled anyway.  Then ehca could
> > just get rid of the FMR support it has.
>
> OK, The iSER design took into account the case of many initiators
> running on strong/modern machines talking to possibly lightweight
> embedded target for which the processing cost per I/O at the target side
> should be minimized, that is at most --one-- RDMA operation should be
> issued by the target to serve an I/O request.
>
> For that end, iSER works with one descriptor (called stag in iWARP and
> rkey in IB) per I/O direction sent from the initiator to the target and
> hence can't work without some sort of FMR implementation.
>
> The current implementation of the open iscsi initiator makes sure to
> issue commands in thread (sleepable) context, see iscsi_xmitworker and
> references to it in drivers/scsi/libiscsi.c , so this keeps ehca users
> safe for the time being.
>
> Or.
>

I agree, *some* form of FMR support is important for iSER (and probably
for NFS over RDMA as well). Rather than adding a crippled NO FMR
mode it would make more sense to add support for FMR Work Requests.
I'm not certain what, if any, impact that would have on the Power5 problem,
but that's certainly a cleaner path for iWARP.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-13 19:22                     ` [ofa-general] " Caitlin Bestler
@ 2007-12-13 20:59                       ` Joachim Fenkes
  2007-12-13 21:08                         ` Caitlin Bestler
  0 siblings, 1 reply; 19+ messages in thread
From: Joachim Fenkes @ 2007-12-13 20:59 UTC (permalink / raw)
  To: Caitlin Bestler
  Cc: Arnd Bergmann, caitlin.bestler, OF-General, LKML, linuxppc-dev,
	Or Gerlitz, Roland Dreier, Stefan Roscher

caitlin.bestler@gmail.com wrote on 13.12.2007 20:22:49:

> On Dec 13, 2007 12:30 AM, Or Gerlitz <ogerlitz@voltaire.com> wrote:
> > The current implementation of the open iscsi initiator makes sure to
> > issue commands in thread (sleepable) context, see iscsi_xmitworker and
> > references to it in drivers/scsi/libiscsi.c , so this keeps ehca users
> > safe for the time being.

> I agree, *some* form of FMR support is important for iSER (and probably
> for NFS over RDMA as well). Rather than adding a crippled NO FMR
> mode it would make more sense to add support for FMR Work Requests.
> I'm not certain what, if any, impact that would have on the Power5 
problem,
> but that's certainly a cleaner path for iWARP.

Well, FMR WRs wouldn't change the eHCA issue -- the driver would have to 
make an hCall in any case, and the architecture says that the hCalls used 
in this scenario might return H_LONG_BUSY, causing the driver to sleep. No 
way around that. Because of this, eHCA's FMRs are actually standard MRs 
with a different API.

If, as Or said, the iSCSI initiator issues commands in sleepable context 
anyway, nothing would be lost by using standard MRs as a fallback solution 
if FMRs aren't available, would it?

J.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-13 20:59                       ` Joachim Fenkes
@ 2007-12-13 21:08                         ` Caitlin Bestler
  2007-12-13 21:35                           ` Joachim Fenkes
  2007-12-13 21:48                           ` [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: SerializeHCA-related " Sean Hefty
  0 siblings, 2 replies; 19+ messages in thread
From: Caitlin Bestler @ 2007-12-13 21:08 UTC (permalink / raw)
  To: Joachim Fenkes
  Cc: Arnd Bergmann, OF-General, LKML, linuxppc-dev, Or Gerlitz,
	Roland Dreier, Stefan Roscher



> -----Original Message-----
> From: Joachim Fenkes [mailto:FENKES@de.ibm.com]
> Sent: Thursday, December 13, 2007 1:00 PM
> To: Caitlin Bestler
> Cc: Arnd Bergmann; caitlin.bestler@gmail.com; OF-General; LKML;
> linuxppc-dev@ozlabs.org; Or Gerlitz; Roland Dreier; Stefan Roscher
> Subject: Re: [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: Serialize
> HCA-related hCalls on POWER5
> 
> caitlin.bestler@gmail.com wrote on 13.12.2007 20:22:49:
> 
> > On Dec 13, 2007 12:30 AM, Or Gerlitz <ogerlitz@voltaire.com> wrote:
> > > The current implementation of the open iscsi initiator makes sure
> to
> > > issue commands in thread (sleepable) context, see iscsi_xmitworker
> and
> > > references to it in drivers/scsi/libiscsi.c , so this keeps ehca
> users
> > > safe for the time being.
> 
> > I agree, *some* form of FMR support is important for iSER (and
> probably
> > for NFS over RDMA as well). Rather than adding a crippled NO FMR
> > mode it would make more sense to add support for FMR Work Requests.
> > I'm not certain what, if any, impact that would have on the Power5
> problem,
> > but that's certainly a cleaner path for iWARP.
> 
> Well, FMR WRs wouldn't change the eHCA issue -- the driver would have
> to
> make an hCall in any case, and the architecture says that the hCalls
> used
> in this scenario might return H_LONG_BUSY, causing the driver to
sleep.
> No
> way around that. Because of this, eHCA's FMRs are actually standard
MRs
> with a different API.
> 
> If, as Or said, the iSCSI initiator issues commands in sleepable
> context
> anyway, nothing would be lost by using standard MRs as a fallback
> solution
> if FMRs aren't available, would it?
> 

To clarify, an FMR Work Request is simply posted to the SendQ like
any other Work Request (of course the QP has to be privileged, or
it will complete in error). An SQ Post should never block.

But yes, if the current iSCSI initiator always does all call-based
FMRs in a sleepable context then I would agree then any changes can
wait for the first vendor that wants to support FMR Work Requests.

FMR Work Requests can be pipelined, so anyone with hardware that
supported them would have strong motivation to enable the open
iSCSI initiator to take advantage of this.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
  2007-12-13 21:08                         ` Caitlin Bestler
@ 2007-12-13 21:35                           ` Joachim Fenkes
  2007-12-13 21:48                           ` [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: SerializeHCA-related " Sean Hefty
  1 sibling, 0 replies; 19+ messages in thread
From: Joachim Fenkes @ 2007-12-13 21:35 UTC (permalink / raw)
  To: Caitlin Bestler
  Cc: Arnd Bergmann, OF-General, LKML, linuxppc-dev, Or Gerlitz,
	Roland Dreier, Stefan Roscher

"Caitlin Bestler" <Caitlin.Bestler@neterion.com> wrote on 13.12.2007 
22:08:34:

> To clarify, an FMR Work Request is simply posted to the SendQ like
> any other Work Request (of course the QP has to be privileged, or
> it will complete in error). An SQ Post should never block.

This would require hardware support, wouldn't it? eHCA2 doesn't have this 
kind of support, so FMR WRs are not an option here.

J.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* RE: [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: SerializeHCA-related hCalls on POWER5
  2007-12-13 21:08                         ` Caitlin Bestler
  2007-12-13 21:35                           ` Joachim Fenkes
@ 2007-12-13 21:48                           ` Sean Hefty
  1 sibling, 0 replies; 19+ messages in thread
From: Sean Hefty @ 2007-12-13 21:48 UTC (permalink / raw)
  To: 'Caitlin Bestler', Joachim Fenkes
  Cc: Arnd Bergmann, Roland Dreier, LKML, linuxppc-dev, OF-General,
	Stefan Roscher

>To clarify, an FMR Work Request is simply posted to the SendQ like
>any other Work Request (of course the QP has to be privileged, or
>it will complete in error). An SQ Post should never block.

FMR's as defined by the IB spec and that created by Mellanox are not the same.
They, unfortunately, use the same name and acronym only.  Mellanox FMRs use an
API that is more like that of standard MRs. 

- Sean


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5
       [not found] <OF85E31FAA.DADA6039-ONC12573AA.005439C8-C12573AA.005A132E@LocalDomain>
@ 2007-12-10 17:59 ` Joachim Fenkes
  0 siblings, 0 replies; 19+ messages in thread
From: Joachim Fenkes @ 2007-12-10 17:59 UTC (permalink / raw)
  To: Arnd Bergmann, OF-EWG, OF-General, LKML, linuxppc-dev,
	Roland Dreier, Roland Dreier
  Cc: Christoph Raisch, Marcus Eder, Stefan Roscher

Hi, guys,

> We're taking this to the firmware architects at the moment, but they're 
not 
> very fond of the idea of reporting the absence of bugs through 
capability 
> flags, as this could quickly lead to the exhaustion of flag bits. We'll 
let 
> the discussion stew for a bit, but if we don't get this flag, we'll have 
to 
> resort to the CPU features.

The architects have spoken, and we're getting a capability flag for this. 
I'll repost my patch with new autodetection code that doesn't involve 
checking the processor version.
 
> >  > Regarding the performance problem, have you checked whether 
converting all
> >  > your spin_lock_irqsave to spin_lock/spin_lock_irq improves your 
performance
> >  > on the older machines? Maybe it's already fast enough that way.
> > 
> > It does seem that the only places that the hcall_lock is taken also
> > use msleep, so they must always be in process context.  So you can
> > safely just use spin_lock(), right?
>
> As Arnd said, there are hCalls that will never return H_LONG_BUSY_*, 
such as 
> H_QUERY_PORT and chums, so they will never sleep. The surrounding 
functions, 
> though, are not prepared to be called from interrupt context (GFP_KERNEL 
comes
> to mind), so I agree that a simple spin_lock() will suffice. Thanks, 
Arnd, for
> pointing this out.

As I pointed out in my earlier mail, there's still an issue with 
map_phys_fmr possibly sleeping. Let's keep the irqsave for the time being 
and revisit this part once we find a solution to map_phys_fmr.

Regards,
  Joachim

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2007-12-13 22:00 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-12-06 15:07 [PATCH] IB/ehca: Serialize HCA-related hCalls on POWER5 Joachim Fenkes
2007-12-06 15:48 ` Arnd Bergmann
2007-12-06 18:27   ` Roland Dreier
2007-12-07  9:58     ` Arnd Bergmann
2007-12-09 23:22       ` Roland Dreier
2007-12-10 17:41         ` Joachim Fenkes
2007-12-10 21:47           ` Roland Dreier
2007-12-11  8:38             ` Joachim Fenkes
2007-12-12 12:14               ` [ewg] " Or Gerlitz
2007-12-12 16:02                 ` Christoph Raisch
2007-12-12 19:09                 ` Roland Dreier
2007-12-13  8:30                   ` Or Gerlitz
2007-12-13 19:22                     ` [ofa-general] " Caitlin Bestler
2007-12-13 20:59                       ` Joachim Fenkes
2007-12-13 21:08                         ` Caitlin Bestler
2007-12-13 21:35                           ` Joachim Fenkes
2007-12-13 21:48                           ` [ofa-general] Re: [ewg] Re: [PATCH] IB/ehca: SerializeHCA-related " Sean Hefty
2007-12-07 16:25     ` [PATCH] IB/ehca: Serialize HCA-related " Joachim Fenkes
     [not found] <OF85E31FAA.DADA6039-ONC12573AA.005439C8-C12573AA.005A132E@LocalDomain>
2007-12-10 17:59 ` Joachim Fenkes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).