All of lore.kernel.org
 help / color / mirror / Atom feed
* [Adeos-main] [PATCH+HACK] optimise root stalling
@ 2006-09-25  7:35 Jan Kiszka
  2006-09-25 10:44 ` Jan Kiszka
  2006-09-28  9:07 ` Philippe Gerum
  0 siblings, 2 replies; 13+ messages in thread
From: Jan Kiszka @ 2006-09-25  7:35 UTC (permalink / raw)
  To: adeos-main


[-- Attachment #1.1: Type: text/plain, Size: 1105 bytes --]

The first one is a real patch, introducing an optimisation of the
stall/unstall-root path of I-pipe. It removes any caring for the
pipeline head case of the root doamin from ipipe_root_stall (something
that was lacking for test_and_stall_root anyway), and it pushes under UP
the IRQ disabling into the less likely __ipipe_sync_pipeline path in
ipipe_unstall_root.

Those two changes together gave me about 5% better Linux performance
under RT-load on a low-end Pentium.

A further idea of mine was to inline light-weight parts of the
__ipipe_*_root API on UP (no SMP at hand to judge on it as well). That's
what the second patch, err, hack is for. It gives another 5% Linux
speedup on x86, but it unfortunately contains some tricky dependency
issues. Of course, it also slightly increases the code size again.

Looking at other linux/ipipe.h duplications, e.g. in linux/preempt.h, it
might be worth considering to derive a basic header that has no further
dependencies and can be included anywhere. Based on such header the
proposed optimisation could be built up more cleanly.

Jan

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.2: optimise-root-stalling.patch --]
[-- Type: text/x-patch; name="optimise-root-stalling.patch", Size: 1478 bytes --]

---
 kernel/ipipe/core.c |   20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

Index: linux-2.6.17.13/kernel/ipipe/core.c
===================================================================
--- linux-2.6.17.13.orig/kernel/ipipe/core.c
+++ linux-2.6.17.13/kernel/ipipe/core.c
@@ -134,16 +134,8 @@ void __ipipe_stall_root(void)
 	unsigned long flags;
 
 	ipipe_get_cpu(flags); /* Care for migration. */
-
 	set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
-
-#ifdef CONFIG_SMP
-	if (!__ipipe_pipeline_head_p(ipipe_root_domain))
-		ipipe_put_cpu(flags);
-#else /* CONFIG_SMP */
-	if (__ipipe_pipeline_head_p(ipipe_root_domain))
-		local_irq_disable_hw();
-#endif /* CONFIG_SMP */
+	ipipe_put_cpu(flags);
 }
 
 void __ipipe_cleanup_domain(struct ipipe_domain *ipd)
@@ -166,6 +158,7 @@ void __ipipe_unstall_root(void)
 {
 	ipipe_declare_cpuid;
 
+#ifdef CONFIG_SMP
 	local_irq_disable_hw();
 
 	ipipe_load_cpuid();
@@ -176,6 +169,15 @@ void __ipipe_unstall_root(void)
 		__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
 
 	local_irq_enable_hw();
+#else	/* !CONFIG_SMP */
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
+
+	if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)) {
+		local_irq_disable_hw();
+		__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+		local_irq_enable_hw();
+	}
+#endif	/* CONFIG_SMP */
 }
 
 unsigned long __ipipe_test_root(void)

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.3: optimise-root-fastpath-v2.patch --]
[-- Type: text/x-patch; name="optimise-root-fastpath-v2.patch", Size: 6650 bytes --]

---
 arch/i386/kernel/entry.S  |    2 -
 include/asm-i386/system.h |   51 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/ipipe.h     |   34 +++++++++++++++++++++++-------
 kernel/ipipe/core.c       |   26 +++++++++++++----------
 4 files changed, 90 insertions(+), 23 deletions(-)

Index: linux-2.6.17.13/include/linux/ipipe.h
===================================================================
--- linux-2.6.17.13.orig/include/linux/ipipe.h
+++ linux-2.6.17.13/include/linux/ipipe.h
@@ -356,18 +356,12 @@ void __ipipe_remove_domain_proc(struct i
 
 void __ipipe_flush_printk(unsigned irq, void *cookie);
 
-void __ipipe_stall_root(void);
-
-void __ipipe_unstall_root(void);
-
-unsigned long __ipipe_test_root(void);
-
-unsigned long __ipipe_test_and_stall_root(void);
-
 void fastcall __ipipe_walk_pipeline(struct list_head *pos, int cpuid);
 
 void fastcall __ipipe_restore_root(unsigned long x);
 
+void __ipipe_unstall_root(void);
+
 int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head);
 
 int fastcall __ipipe_dispatch_event(unsigned event, void *data);
@@ -403,6 +397,30 @@ cpumask_t __ipipe_set_irq_affinity(unsig
 int fastcall __ipipe_send_ipi(unsigned ipi,
 			      cpumask_t cpumask);
 
+void __ipipe_stall_root(void);
+
+unsigned long __ipipe_test_root(void);
+
+unsigned long __ipipe_test_and_stall_root(void);
+
+#else /* !CONFIG_SMP */
+
+static inline void __ipipe_stall_root(void)
+{
+	set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[0].status);
+}
+
+static inline unsigned long __ipipe_test_root(void)
+{
+	return test_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[0].status);
+}
+
+static inline unsigned long __ipipe_test_and_stall_root(void)
+{
+	return test_and_set_bit(IPIPE_STALL_FLAG,
+				&ipipe_root_domain->cpudata[0].status);
+}
+
 #endif /* CONFIG_SMP */
 
 /* Called with hw interrupts off. */
Index: linux-2.6.17.13/include/asm-i386/system.h
===================================================================
--- linux-2.6.17.13.orig/include/asm-i386/system.h
+++ linux-2.6.17.13/include/asm-i386/system.h
@@ -462,6 +462,9 @@ static inline unsigned long long __cmpxc
 
 #include <linux/linkage.h>
 
+void fastcall __ipipe_restore_root(unsigned long flags);
+
+#ifdef CONFIG_SMP
 void __ipipe_stall_root(void);
 
 void __ipipe_unstall_root(void);
@@ -470,16 +473,58 @@ unsigned long __ipipe_test_root(void);
 
 unsigned long __ipipe_test_and_stall_root(void);
 
-void fastcall __ipipe_restore_root(unsigned long flags);
+#elif !defined(__LINUX_IPIPE_H)
+
+#define HACK_IPIPE_STALL_FLAG	0	/* Stalls a pipeline stage -- guaranteed at bit #0 */
+
+struct __HACK_ipipe_domain {
+
+	void *fill1, *fill2;
+
+	struct {
+		unsigned long status;
+	} cpudata[1];
+};
+
+extern struct ipipe_domain ipipe_root;
+
+#define HACK_ipipe_root_domain ((struct __HACK_ipipe_domain *)&ipipe_root)
+
+static inline void __HACK_ipipe_stall_root(void)
+{
+	set_bit(HACK_IPIPE_STALL_FLAG, &HACK_ipipe_root_domain->cpudata[0].status);
+}
+
+static inline unsigned long __HACK_ipipe_test_root(void)
+{
+	return test_bit(HACK_IPIPE_STALL_FLAG, &HACK_ipipe_root_domain->cpudata[0].status);
+}
+
+static inline unsigned long __HACK_ipipe_test_and_stall_root(void)
+{
+	return test_and_set_bit(HACK_IPIPE_STALL_FLAG,
+				&HACK_ipipe_root_domain->cpudata[0].status);
+}
+
+#define local_save_flags(x)	((x) = (!__HACK_ipipe_test_root()) << 9)
+#define local_irq_save(x)	((x) = (!__HACK_ipipe_test_and_stall_root()) << 9)
+#define local_irq_disable()	__HACK_ipipe_stall_root()
+
+#define irqs_disabled()		__HACK_ipipe_test_root()
+
+#else
 
 #define local_save_flags(x)	((x) = (!__ipipe_test_root()) << 9)
 #define local_irq_save(x)	((x) = (!__ipipe_test_and_stall_root()) << 9)
-#define local_irq_restore(x)	__ipipe_restore_root(!(x & 0x200))
 #define local_irq_disable()	__ipipe_stall_root()
-#define local_irq_enable()	__ipipe_unstall_root()
 
 #define irqs_disabled()		__ipipe_test_root()
 
+#endif
+
+#define local_irq_restore(x)	__ipipe_restore_root(!(x & 0x200))
+#define local_irq_enable()	__ipipe_unstall_root()
+
 #define halt()	__asm__ __volatile__("hlt": : :"memory")
 
 #ifdef CONFIG_IPIPE_TRACE_IRQSOFF
Index: linux-2.6.17.13/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.17.13.orig/arch/i386/kernel/entry.S
+++ linux-2.6.17.13/arch/i386/kernel/entry.S
@@ -77,7 +77,7 @@ NT_MASK		= 0x00004000
 VM_MASK		= 0x00020000
 
 #ifdef CONFIG_IPIPE
-#define CLI                     call __ipipe_stall_root	;  sti
+#define CLI                     btsl $0,ipipe_root+0x8
 #define STI                     call __ipipe_unstall_root
 #define STI_COND_HW             sti
 #define EMULATE_ROOT_IRET(bypass) \
Index: linux-2.6.17.13/kernel/ipipe/core.c
===================================================================
--- linux-2.6.17.13.orig/kernel/ipipe/core.c
+++ linux-2.6.17.13/kernel/ipipe/core.c
@@ -128,16 +128,6 @@ void __ipipe_init_stage(struct ipipe_dom
 #endif	/* CONFIG_SMP */
 }
 
-void __ipipe_stall_root(void)
-{
-	ipipe_declare_cpuid;
-	unsigned long flags;
-
-	ipipe_get_cpu(flags); /* Care for migration. */
-	set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
-	ipipe_put_cpu(flags);
-}
-
 void __ipipe_cleanup_domain(struct ipipe_domain *ipd)
 {
 	ipipe_unstall_pipeline_from(ipd);
@@ -180,6 +170,17 @@ void __ipipe_unstall_root(void)
 #endif	/* CONFIG_SMP */
 }
 
+#ifdef CONFIG_SMP
+void __ipipe_stall_root(void)
+{
+	ipipe_declare_cpuid;
+	unsigned long flags;
+
+	ipipe_get_cpu(flags); /* Care for migration. */
+	set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
+	ipipe_put_cpu(flags);
+}
+
 unsigned long __ipipe_test_root(void)
 {
 	unsigned long flags, x;
@@ -204,6 +205,7 @@ unsigned long __ipipe_test_and_stall_roo
 
 	return x;
 }
+#endif	/* CONFIG_SMP */
 
 void fastcall __ipipe_restore_root(unsigned long x)
 {
@@ -1047,10 +1049,12 @@ EXPORT_SYMBOL(ipipe_test_and_unstall_pip
 EXPORT_SYMBOL(ipipe_unstall_pipeline_head);
 EXPORT_SYMBOL(__ipipe_restore_pipeline_head);
 EXPORT_SYMBOL(__ipipe_unstall_root);
-EXPORT_SYMBOL(__ipipe_stall_root);
 EXPORT_SYMBOL(__ipipe_restore_root);
+#ifdef CONFIG_CMP
+EXPORT_SYMBOL(__ipipe_stall_root);
 EXPORT_SYMBOL(__ipipe_test_and_stall_root);
 EXPORT_SYMBOL(__ipipe_test_root);
+#endif /* CONFIG_SMP */
 EXPORT_SYMBOL(__ipipe_dispatch_event);
 EXPORT_SYMBOL(__ipipe_dispatch_wired);
 EXPORT_SYMBOL(__ipipe_sync_stage);

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 249 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-25  7:35 [Adeos-main] [PATCH+HACK] optimise root stalling Jan Kiszka
@ 2006-09-25 10:44 ` Jan Kiszka
  2006-09-28 15:33   ` Philippe Gerum
  2006-09-28  9:07 ` Philippe Gerum
  1 sibling, 1 reply; 13+ messages in thread
From: Jan Kiszka @ 2006-09-25 10:44 UTC (permalink / raw)
  To: adeos-main

[-- Attachment #1: Type: text/plain, Size: 310 bytes --]

Jan Kiszka wrote:
> ...
> A further idea of mine was to inline light-weight parts of the
> __ipipe_*_root API on UP (no SMP at hand to judge on it as well).

BTW: I haven't looked at any detail yet, but would PER_CPU here and
there (e.g. for ipipe_percpu_domain) buy us something on SMP (2.6)?

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-25  7:35 [Adeos-main] [PATCH+HACK] optimise root stalling Jan Kiszka
  2006-09-25 10:44 ` Jan Kiszka
@ 2006-09-28  9:07 ` Philippe Gerum
  2006-09-28  9:15   ` Jan Kiszka
  1 sibling, 1 reply; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28  9:07 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: adeos-main

On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:

This one won't work. We need to forcibly re-enable the hw IRQs upon root
unstall requests, regardless of the fact that interrupts are pending in
the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
stuck in the delay calibration routine, but there are other more tricky
places where this would bite too.

> @@ -166,6 +158,7 @@ void __ipipe_unstall_root(void)
>  {
>         ipipe_declare_cpuid;
>  
> +#ifdef CONFIG_SMP
>         local_irq_disable_hw();
>  
>         ipipe_load_cpuid();
> @@ -176,6 +169,15 @@ void __ipipe_unstall_root(void)
>                 __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
>  
>         local_irq_enable_hw();
> +#else  /* !CONFIG_SMP */
> +       __clear_bit(IPIPE_STALL_FLAG,
> &ipipe_root_domain->cpudata[cpuid].status);
> +
> +       if
> (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)) {
> +               local_irq_disable_hw();
> +               __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
> +               local_irq_enable_hw();
> +       }
> +#endif /* CONFIG_SMP */
>  }
>   
-- 
Philippe.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-28  9:07 ` Philippe Gerum
@ 2006-09-28  9:15   ` Jan Kiszka
  2006-09-28  9:16     ` Philippe Gerum
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Kiszka @ 2006-09-28  9:15 UTC (permalink / raw)
  To: rpm; +Cc: adeos-main

[-- Attachment #1: Type: text/plain, Size: 899 bytes --]

Philippe Gerum wrote:
> On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
> 
> This one won't work. We need to forcibly re-enable the hw IRQs upon root
> unstall requests, regardless of the fact that interrupts are pending in
> the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
> stuck in the delay calibration routine, but there are other more tricky
> places where this would bite too.

But this would be ok?

#else  /* !CONFIG_SMP */
        __clear_bit(IPIPE_STALL_FLAG,
                    &ipipe_root_domain->cpudata[cpuid].status);

       if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
           0)) {
               local_irq_disable_hw();
               __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
       }
       local_irq_enable_hw();
#endif /* CONFIG_SMP */

So we can still save one disable IRQ in the fastpath.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 249 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-28  9:15   ` Jan Kiszka
@ 2006-09-28  9:16     ` Philippe Gerum
  2006-09-28  9:23       ` Jan Kiszka
  0 siblings, 1 reply; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28  9:16 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: adeos-main

On Thu, 2006-09-28 at 11:15 +0200, Jan Kiszka wrote:
> Philippe Gerum wrote:
> > On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
> > 
> > This one won't work. We need to forcibly re-enable the hw IRQs upon root
> > unstall requests, regardless of the fact that interrupts are pending in
> > the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
> > stuck in the delay calibration routine, but there are other more tricky
> > places where this would bite too.
> 
> But this would be ok?
> 
> #else  /* !CONFIG_SMP */
>         __clear_bit(IPIPE_STALL_FLAG,
>                     &ipipe_root_domain->cpudata[cpuid].status);
> 
>        if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
>            0)) {
>                local_irq_disable_hw();
>                __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
>        }
>        local_irq_enable_hw();
> #endif /* CONFIG_SMP */
> 
> So we can still save one disable IRQ in the fastpath.
> 

But in such a case, you would have to use clear_bit() to keep atomicity.

> Jan
> 
-- 
Philippe.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-28  9:16     ` Philippe Gerum
@ 2006-09-28  9:23       ` Jan Kiszka
  2006-09-28 10:07         ` Philippe Gerum
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Kiszka @ 2006-09-28  9:23 UTC (permalink / raw)
  To: rpm; +Cc: adeos-main

[-- Attachment #1: Type: text/plain, Size: 1146 bytes --]

Philippe Gerum wrote:
> On Thu, 2006-09-28 at 11:15 +0200, Jan Kiszka wrote:
>> Philippe Gerum wrote:
>>> On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
>>>
>>> This one won't work. We need to forcibly re-enable the hw IRQs upon root
>>> unstall requests, regardless of the fact that interrupts are pending in
>>> the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
>>> stuck in the delay calibration routine, but there are other more tricky
>>> places where this would bite too.
>> But this would be ok?
>>
>> #else  /* !CONFIG_SMP */
>>         __clear_bit(IPIPE_STALL_FLAG,
>>                     &ipipe_root_domain->cpudata[cpuid].status);
>>
>>        if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
>>            0)) {
>>                local_irq_disable_hw();
>>                __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
>>        }
>>        local_irq_enable_hw();
>> #endif /* CONFIG_SMP */
>>
>> So we can still save one disable IRQ in the fastpath.
>>
> 
> But in such a case, you would have to use clear_bit() to keep atomicity.

This is for UP, not SMP.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 249 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-28  9:23       ` Jan Kiszka
@ 2006-09-28 10:07         ` Philippe Gerum
  2006-09-28 10:31           ` Jan Kiszka
  0 siblings, 1 reply; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28 10:07 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: adeos-main

On Thu, 2006-09-28 at 11:23 +0200, Jan Kiszka wrote:
> Philippe Gerum wrote:
> > On Thu, 2006-09-28 at 11:15 +0200, Jan Kiszka wrote:
> >> Philippe Gerum wrote:
> >>> On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
> >>>
> >>> This one won't work. We need to forcibly re-enable the hw IRQs upon root
> >>> unstall requests, regardless of the fact that interrupts are pending in
> >>> the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
> >>> stuck in the delay calibration routine, but there are other more tricky
> >>> places where this would bite too.
> >> But this would be ok?
> >>
> >> #else  /* !CONFIG_SMP */
> >>         __clear_bit(IPIPE_STALL_FLAG,
> >>                     &ipipe_root_domain->cpudata[cpuid].status);
> >>
> >>        if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
> >>            0)) {
> >>                local_irq_disable_hw();
> >>                __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
> >>        }
> >>        local_irq_enable_hw();
> >> #endif /* CONFIG_SMP */
> >>
> >> So we can still save one disable IRQ in the fastpath.
> >>
> > 
> > But in such a case, you would have to use clear_bit() to keep atomicity.
> 
> This is for UP, not SMP.
> 

This is not a UP vs SMP issue, the stall bit is strictly CPU local
anyway. The issue is that some archs do _not_ have intrinsically atomic
bitops; they need to emulate them by masking interrupts around the bit
setting operation. Using __clear_bit() in this context would make this
code preemptible by an interrupt in the middle of the bitop. Disabling
hw IRQs on entry allows us to use non-atomic bitops.

> Jan
> 
> _______________________________________________
> Adeos-main mailing list
> Adeos-main@domain.hid
> https://mail.gna.org/listinfo/adeos-main
-- 
Philippe.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-28 10:07         ` Philippe Gerum
@ 2006-09-28 10:31           ` Jan Kiszka
  2006-09-28 10:46             ` Philippe Gerum
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Kiszka @ 2006-09-28 10:31 UTC (permalink / raw)
  To: rpm; +Cc: adeos-main

[-- Attachment #1: Type: text/plain, Size: 1828 bytes --]

Philippe Gerum wrote:
> On Thu, 2006-09-28 at 11:23 +0200, Jan Kiszka wrote:
>> Philippe Gerum wrote:
>>> On Thu, 2006-09-28 at 11:15 +0200, Jan Kiszka wrote:
>>>> Philippe Gerum wrote:
>>>>> On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
>>>>>
>>>>> This one won't work. We need to forcibly re-enable the hw IRQs upon root
>>>>> unstall requests, regardless of the fact that interrupts are pending in
>>>>> the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
>>>>> stuck in the delay calibration routine, but there are other more tricky
>>>>> places where this would bite too.
>>>> But this would be ok?
>>>>
>>>> #else  /* !CONFIG_SMP */
>>>>         __clear_bit(IPIPE_STALL_FLAG,
>>>>                     &ipipe_root_domain->cpudata[cpuid].status);
>>>>
>>>>        if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
>>>>            0)) {
>>>>                local_irq_disable_hw();
>>>>                __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
>>>>        }
>>>>        local_irq_enable_hw();
>>>> #endif /* CONFIG_SMP */
>>>>
>>>> So we can still save one disable IRQ in the fastpath.
>>>>
>>> But in such a case, you would have to use clear_bit() to keep atomicity.
>> This is for UP, not SMP.
>>
> 
> This is not a UP vs SMP issue, the stall bit is strictly CPU local
> anyway. The issue is that some archs do _not_ have intrinsically atomic
> bitops; they need to emulate them by masking interrupts around the bit
> setting operation. Using __clear_bit() in this context would make this
> code preemptible by an interrupt in the middle of the bitop. Disabling
> hw IRQs on entry allows us to use non-atomic bitops.

Yeah, I meanwhile realised my x86-restricted view as well. :)

Does it still make sense then to optimise like I proposed?


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-28 10:31           ` Jan Kiszka
@ 2006-09-28 10:46             ` Philippe Gerum
  2006-09-29  7:48               ` Jan Kiszka
  0 siblings, 1 reply; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28 10:46 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: adeos-main

On Thu, 2006-09-28 at 12:31 +0200, Jan Kiszka wrote:
> Does it still make sense then to optimise like I proposed?
> 

If you did measure a 5% improvement on low-end x86 hw, then I guess so.
Moving __ipipe_unstall_root to the arch-dependent section would do, so
that we could use the most efficient implementation on a case by case
basis.

-- 
Philippe.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-25 10:44 ` Jan Kiszka
@ 2006-09-28 15:33   ` Philippe Gerum
  0 siblings, 0 replies; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28 15:33 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: adeos-main

On Mon, 2006-09-25 at 12:44 +0200, Jan Kiszka wrote:
> Jan Kiszka wrote:
> > ...
> > A further idea of mine was to inline light-weight parts of the
> > __ipipe_*_root API on UP (no SMP at hand to judge on it as well).
> 
> BTW: I haven't looked at any detail yet, but would PER_CPU here and
> there (e.g. for ipipe_percpu_domain) buy us something on SMP (2.6)?
> 

At the very least, we don't need to share any cacheline for
ipipe_percpu_domain[] which is purely a per-CPU variable.

> Jan
> 
> _______________________________________________
> Adeos-main mailing list
> Adeos-main@domain.hid
> https://mail.gna.org/listinfo/adeos-main
-- 
Philippe.




^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-28 10:46             ` Philippe Gerum
@ 2006-09-29  7:48               ` Jan Kiszka
  0 siblings, 0 replies; 13+ messages in thread
From: Jan Kiszka @ 2006-09-29  7:48 UTC (permalink / raw)
  To: rpm; +Cc: adeos-main

[-- Attachment #1: Type: text/plain, Size: 1810 bytes --]

Philippe Gerum wrote:

> On Thu, 2006-09-28 at 12:31 +0200, Jan Kiszka wrote:
>> Does it still make sense then to optimise like I proposed?
>>
>
> If you did measure a 5% improvement on low-end x86 hw, then I guess so.
> Moving __ipipe_unstall_root to the arch-dependent section would do, so
> that we could use the most efficient implementation on a case by case
> basis.
>

I'm not seeing 5% alone due to this change, it's a piece in the puzzle. I just
benchmarked the fixed version below against the unmodified one. There is an improvement
of the average hackbench runtime, though it's minimal. The wrong variant I posted first
doesn't show noticeable benefit compared to the correct one. I guess the "critical"
part is switching IRQs off when they were on, and not just enabling them when they
weren't disabled.

So I think the one below could be applied for now, but per-arch optimisations may
remain on the to-do list for the future if we discover archs that could do better with
hand-optimised code here. The x86 assembly looks quite good already.


--- linux-2.6.17.13.orig/kernel/ipipe/core.c
+++ linux-2.6.17.13/kernel/ipipe/core.c
@@ -166,6 +158,7 @@ void __ipipe_unstall_root(void)
 {
 	ipipe_declare_cpuid;
 
+#ifdef CONFIG_SMP
 	local_irq_disable_hw();
 
 	ipipe_load_cpuid();
@@ -174,6 +167,14 @@ void __ipipe_unstall_root(void)
 
 	if (ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)
 		__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+#else	/* !CONFIG_SMP */
+	clear_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
+
+	if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)) {
+		local_irq_disable_hw();
+		__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+	}
+#endif	/* CONFIG_SMP */
 
 	local_irq_enable_hw();
 }



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 249 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
  2006-09-26 15:07 gilles.chanteperdrix
@ 2006-09-26 15:14 ` Jan Kiszka
  0 siblings, 0 replies; 13+ messages in thread
From: Jan Kiszka @ 2006-09-26 15:14 UTC (permalink / raw)
  To: gilles.chanteperdrix; +Cc: adeos-main

[-- Attachment #1: Type: text/plain, Size: 886 bytes --]

gilles.chanteperdrix wrote:
> ---------- Debut du message initial -----------
> 
> De     : adeos-main-bounces@domain.hid
> A      : adeos-main@gna.org
> Copies : 
> Date   : Mon, 25 Sep 2006 12:44:02 +0200
> Objet  : Re: [Adeos-main] [PATCH+HACK] optimise root stalling
> 
>> Jan Kiszka wrote:
>>> ...
>>> A further idea of mine was to inline light-weight parts of the
>>> __ipipe_*_root API on UP (no SMP at hand to judge on it as
> well).
>> BTW: I haven't looked at any detail yet, but would PER_CPU
> here and
>> there (e.g. for ipipe_percpu_domain) buy us something on SMP
> (2.6)?
> 
> I think to remember that the PER_CPU support uses
> smp_processor_id(), 
> and we know that smp_processor_id() must not be used over Xenomai
> stacks.
> 

Not necessarily: per_cpu(var, cpu) retrieves a variable for the given
cpu without touching smp_processor_id.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
@ 2006-09-26 15:07 gilles.chanteperdrix
  2006-09-26 15:14 ` Jan Kiszka
  0 siblings, 1 reply; 13+ messages in thread
From: gilles.chanteperdrix @ 2006-09-26 15:07 UTC (permalink / raw)
  To: jan.kiszka; +Cc: adeos-main

---------- Debut du message initial -----------

De     : adeos-main-bounces@domain.hid
A      : adeos-main@gna.org
Copies :
Date   : Mon, 25 Sep 2006 12:44:02 +0200
Objet  : Re: [Adeos-main] [PATCH+HACK] optimise root stalling

> Jan Kiszka wrote:
> > ...
> > A further idea of mine was to inline light-weight parts of the
> > __ipipe_*_root API on UP (no SMP at hand to judge on it as
well).
>
> BTW: I haven't looked at any detail yet, but would PER_CPU
here and
> there (e.g. for ipipe_percpu_domain) buy us something on SMP
(2.6)?

I think to remember that the PER_CPU support uses
smp_processor_id(),
and we know that smp_processor_id() must not be used over Xenomai
stacks.

--
                 Gilles Chanteperdrix

Accédez au courrier électronique de La Poste
sur www.laposte.net ou sur 3615 LAPOSTENET (0,34€ TTC /mn)
1 Giga de stockage gratuit – Antispam et antivirus intégrés





^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2006-09-29  7:48 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-09-25  7:35 [Adeos-main] [PATCH+HACK] optimise root stalling Jan Kiszka
2006-09-25 10:44 ` Jan Kiszka
2006-09-28 15:33   ` Philippe Gerum
2006-09-28  9:07 ` Philippe Gerum
2006-09-28  9:15   ` Jan Kiszka
2006-09-28  9:16     ` Philippe Gerum
2006-09-28  9:23       ` Jan Kiszka
2006-09-28 10:07         ` Philippe Gerum
2006-09-28 10:31           ` Jan Kiszka
2006-09-28 10:46             ` Philippe Gerum
2006-09-29  7:48               ` Jan Kiszka
2006-09-26 15:07 gilles.chanteperdrix
2006-09-26 15:14 ` Jan Kiszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.