* [Adeos-main] [PATCH+HACK] optimise root stalling
@ 2006-09-25 7:35 Jan Kiszka
2006-09-25 10:44 ` Jan Kiszka
2006-09-28 9:07 ` Philippe Gerum
0 siblings, 2 replies; 13+ messages in thread
From: Jan Kiszka @ 2006-09-25 7:35 UTC (permalink / raw)
To: adeos-main
[-- Attachment #1.1: Type: text/plain, Size: 1105 bytes --]
The first one is a real patch, introducing an optimisation of the
stall/unstall-root path of I-pipe. It removes any caring for the
pipeline head case of the root doamin from ipipe_root_stall (something
that was lacking for test_and_stall_root anyway), and it pushes under UP
the IRQ disabling into the less likely __ipipe_sync_pipeline path in
ipipe_unstall_root.
Those two changes together gave me about 5% better Linux performance
under RT-load on a low-end Pentium.
A further idea of mine was to inline light-weight parts of the
__ipipe_*_root API on UP (no SMP at hand to judge on it as well). That's
what the second patch, err, hack is for. It gives another 5% Linux
speedup on x86, but it unfortunately contains some tricky dependency
issues. Of course, it also slightly increases the code size again.
Looking at other linux/ipipe.h duplications, e.g. in linux/preempt.h, it
might be worth considering to derive a basic header that has no further
dependencies and can be included anywhere. Based on such header the
proposed optimisation could be built up more cleanly.
Jan
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.2: optimise-root-stalling.patch --]
[-- Type: text/x-patch; name="optimise-root-stalling.patch", Size: 1478 bytes --]
---
kernel/ipipe/core.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
Index: linux-2.6.17.13/kernel/ipipe/core.c
===================================================================
--- linux-2.6.17.13.orig/kernel/ipipe/core.c
+++ linux-2.6.17.13/kernel/ipipe/core.c
@@ -134,16 +134,8 @@ void __ipipe_stall_root(void)
unsigned long flags;
ipipe_get_cpu(flags); /* Care for migration. */
-
set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
-
-#ifdef CONFIG_SMP
- if (!__ipipe_pipeline_head_p(ipipe_root_domain))
- ipipe_put_cpu(flags);
-#else /* CONFIG_SMP */
- if (__ipipe_pipeline_head_p(ipipe_root_domain))
- local_irq_disable_hw();
-#endif /* CONFIG_SMP */
+ ipipe_put_cpu(flags);
}
void __ipipe_cleanup_domain(struct ipipe_domain *ipd)
@@ -166,6 +158,7 @@ void __ipipe_unstall_root(void)
{
ipipe_declare_cpuid;
+#ifdef CONFIG_SMP
local_irq_disable_hw();
ipipe_load_cpuid();
@@ -176,6 +169,15 @@ void __ipipe_unstall_root(void)
__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
local_irq_enable_hw();
+#else /* !CONFIG_SMP */
+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
+
+ if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)) {
+ local_irq_disable_hw();
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+ local_irq_enable_hw();
+ }
+#endif /* CONFIG_SMP */
}
unsigned long __ipipe_test_root(void)
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.3: optimise-root-fastpath-v2.patch --]
[-- Type: text/x-patch; name="optimise-root-fastpath-v2.patch", Size: 6650 bytes --]
---
arch/i386/kernel/entry.S | 2 -
include/asm-i386/system.h | 51 +++++++++++++++++++++++++++++++++++++++++++---
include/linux/ipipe.h | 34 +++++++++++++++++++++++-------
kernel/ipipe/core.c | 26 +++++++++++++----------
4 files changed, 90 insertions(+), 23 deletions(-)
Index: linux-2.6.17.13/include/linux/ipipe.h
===================================================================
--- linux-2.6.17.13.orig/include/linux/ipipe.h
+++ linux-2.6.17.13/include/linux/ipipe.h
@@ -356,18 +356,12 @@ void __ipipe_remove_domain_proc(struct i
void __ipipe_flush_printk(unsigned irq, void *cookie);
-void __ipipe_stall_root(void);
-
-void __ipipe_unstall_root(void);
-
-unsigned long __ipipe_test_root(void);
-
-unsigned long __ipipe_test_and_stall_root(void);
-
void fastcall __ipipe_walk_pipeline(struct list_head *pos, int cpuid);
void fastcall __ipipe_restore_root(unsigned long x);
+void __ipipe_unstall_root(void);
+
int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head);
int fastcall __ipipe_dispatch_event(unsigned event, void *data);
@@ -403,6 +397,30 @@ cpumask_t __ipipe_set_irq_affinity(unsig
int fastcall __ipipe_send_ipi(unsigned ipi,
cpumask_t cpumask);
+void __ipipe_stall_root(void);
+
+unsigned long __ipipe_test_root(void);
+
+unsigned long __ipipe_test_and_stall_root(void);
+
+#else /* !CONFIG_SMP */
+
+static inline void __ipipe_stall_root(void)
+{
+ set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[0].status);
+}
+
+static inline unsigned long __ipipe_test_root(void)
+{
+ return test_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[0].status);
+}
+
+static inline unsigned long __ipipe_test_and_stall_root(void)
+{
+ return test_and_set_bit(IPIPE_STALL_FLAG,
+ &ipipe_root_domain->cpudata[0].status);
+}
+
#endif /* CONFIG_SMP */
/* Called with hw interrupts off. */
Index: linux-2.6.17.13/include/asm-i386/system.h
===================================================================
--- linux-2.6.17.13.orig/include/asm-i386/system.h
+++ linux-2.6.17.13/include/asm-i386/system.h
@@ -462,6 +462,9 @@ static inline unsigned long long __cmpxc
#include <linux/linkage.h>
+void fastcall __ipipe_restore_root(unsigned long flags);
+
+#ifdef CONFIG_SMP
void __ipipe_stall_root(void);
void __ipipe_unstall_root(void);
@@ -470,16 +473,58 @@ unsigned long __ipipe_test_root(void);
unsigned long __ipipe_test_and_stall_root(void);
-void fastcall __ipipe_restore_root(unsigned long flags);
+#elif !defined(__LINUX_IPIPE_H)
+
+#define HACK_IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at bit #0 */
+
+struct __HACK_ipipe_domain {
+
+ void *fill1, *fill2;
+
+ struct {
+ unsigned long status;
+ } cpudata[1];
+};
+
+extern struct ipipe_domain ipipe_root;
+
+#define HACK_ipipe_root_domain ((struct __HACK_ipipe_domain *)&ipipe_root)
+
+static inline void __HACK_ipipe_stall_root(void)
+{
+ set_bit(HACK_IPIPE_STALL_FLAG, &HACK_ipipe_root_domain->cpudata[0].status);
+}
+
+static inline unsigned long __HACK_ipipe_test_root(void)
+{
+ return test_bit(HACK_IPIPE_STALL_FLAG, &HACK_ipipe_root_domain->cpudata[0].status);
+}
+
+static inline unsigned long __HACK_ipipe_test_and_stall_root(void)
+{
+ return test_and_set_bit(HACK_IPIPE_STALL_FLAG,
+ &HACK_ipipe_root_domain->cpudata[0].status);
+}
+
+#define local_save_flags(x) ((x) = (!__HACK_ipipe_test_root()) << 9)
+#define local_irq_save(x) ((x) = (!__HACK_ipipe_test_and_stall_root()) << 9)
+#define local_irq_disable() __HACK_ipipe_stall_root()
+
+#define irqs_disabled() __HACK_ipipe_test_root()
+
+#else
#define local_save_flags(x) ((x) = (!__ipipe_test_root()) << 9)
#define local_irq_save(x) ((x) = (!__ipipe_test_and_stall_root()) << 9)
-#define local_irq_restore(x) __ipipe_restore_root(!(x & 0x200))
#define local_irq_disable() __ipipe_stall_root()
-#define local_irq_enable() __ipipe_unstall_root()
#define irqs_disabled() __ipipe_test_root()
+#endif
+
+#define local_irq_restore(x) __ipipe_restore_root(!(x & 0x200))
+#define local_irq_enable() __ipipe_unstall_root()
+
#define halt() __asm__ __volatile__("hlt": : :"memory")
#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
Index: linux-2.6.17.13/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.17.13.orig/arch/i386/kernel/entry.S
+++ linux-2.6.17.13/arch/i386/kernel/entry.S
@@ -77,7 +77,7 @@ NT_MASK = 0x00004000
VM_MASK = 0x00020000
#ifdef CONFIG_IPIPE
-#define CLI call __ipipe_stall_root ; sti
+#define CLI btsl $0,ipipe_root+0x8
#define STI call __ipipe_unstall_root
#define STI_COND_HW sti
#define EMULATE_ROOT_IRET(bypass) \
Index: linux-2.6.17.13/kernel/ipipe/core.c
===================================================================
--- linux-2.6.17.13.orig/kernel/ipipe/core.c
+++ linux-2.6.17.13/kernel/ipipe/core.c
@@ -128,16 +128,6 @@ void __ipipe_init_stage(struct ipipe_dom
#endif /* CONFIG_SMP */
}
-void __ipipe_stall_root(void)
-{
- ipipe_declare_cpuid;
- unsigned long flags;
-
- ipipe_get_cpu(flags); /* Care for migration. */
- set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
- ipipe_put_cpu(flags);
-}
-
void __ipipe_cleanup_domain(struct ipipe_domain *ipd)
{
ipipe_unstall_pipeline_from(ipd);
@@ -180,6 +170,17 @@ void __ipipe_unstall_root(void)
#endif /* CONFIG_SMP */
}
+#ifdef CONFIG_SMP
+void __ipipe_stall_root(void)
+{
+ ipipe_declare_cpuid;
+ unsigned long flags;
+
+ ipipe_get_cpu(flags); /* Care for migration. */
+ set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
+ ipipe_put_cpu(flags);
+}
+
unsigned long __ipipe_test_root(void)
{
unsigned long flags, x;
@@ -204,6 +205,7 @@ unsigned long __ipipe_test_and_stall_roo
return x;
}
+#endif /* CONFIG_SMP */
void fastcall __ipipe_restore_root(unsigned long x)
{
@@ -1047,10 +1049,12 @@ EXPORT_SYMBOL(ipipe_test_and_unstall_pip
EXPORT_SYMBOL(ipipe_unstall_pipeline_head);
EXPORT_SYMBOL(__ipipe_restore_pipeline_head);
EXPORT_SYMBOL(__ipipe_unstall_root);
-EXPORT_SYMBOL(__ipipe_stall_root);
EXPORT_SYMBOL(__ipipe_restore_root);
+#ifdef CONFIG_CMP
+EXPORT_SYMBOL(__ipipe_stall_root);
EXPORT_SYMBOL(__ipipe_test_and_stall_root);
EXPORT_SYMBOL(__ipipe_test_root);
+#endif /* CONFIG_SMP */
EXPORT_SYMBOL(__ipipe_dispatch_event);
EXPORT_SYMBOL(__ipipe_dispatch_wired);
EXPORT_SYMBOL(__ipipe_sync_stage);
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 249 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-25 7:35 [Adeos-main] [PATCH+HACK] optimise root stalling Jan Kiszka
@ 2006-09-25 10:44 ` Jan Kiszka
2006-09-28 15:33 ` Philippe Gerum
2006-09-28 9:07 ` Philippe Gerum
1 sibling, 1 reply; 13+ messages in thread
From: Jan Kiszka @ 2006-09-25 10:44 UTC (permalink / raw)
To: adeos-main
[-- Attachment #1: Type: text/plain, Size: 310 bytes --]
Jan Kiszka wrote:
> ...
> A further idea of mine was to inline light-weight parts of the
> __ipipe_*_root API on UP (no SMP at hand to judge on it as well).
BTW: I haven't looked at any detail yet, but would PER_CPU here and
there (e.g. for ipipe_percpu_domain) buy us something on SMP (2.6)?
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-25 7:35 [Adeos-main] [PATCH+HACK] optimise root stalling Jan Kiszka
2006-09-25 10:44 ` Jan Kiszka
@ 2006-09-28 9:07 ` Philippe Gerum
2006-09-28 9:15 ` Jan Kiszka
1 sibling, 1 reply; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28 9:07 UTC (permalink / raw)
To: Jan Kiszka; +Cc: adeos-main
On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
This one won't work. We need to forcibly re-enable the hw IRQs upon root
unstall requests, regardless of the fact that interrupts are pending in
the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
stuck in the delay calibration routine, but there are other more tricky
places where this would bite too.
> @@ -166,6 +158,7 @@ void __ipipe_unstall_root(void)
> {
> ipipe_declare_cpuid;
>
> +#ifdef CONFIG_SMP
> local_irq_disable_hw();
>
> ipipe_load_cpuid();
> @@ -176,6 +169,15 @@ void __ipipe_unstall_root(void)
> __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
>
> local_irq_enable_hw();
> +#else /* !CONFIG_SMP */
> + __clear_bit(IPIPE_STALL_FLAG,
> &ipipe_root_domain->cpudata[cpuid].status);
> +
> + if
> (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)) {
> + local_irq_disable_hw();
> + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
> + local_irq_enable_hw();
> + }
> +#endif /* CONFIG_SMP */
> }
>
--
Philippe.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-28 9:07 ` Philippe Gerum
@ 2006-09-28 9:15 ` Jan Kiszka
2006-09-28 9:16 ` Philippe Gerum
0 siblings, 1 reply; 13+ messages in thread
From: Jan Kiszka @ 2006-09-28 9:15 UTC (permalink / raw)
To: rpm; +Cc: adeos-main
[-- Attachment #1: Type: text/plain, Size: 899 bytes --]
Philippe Gerum wrote:
> On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
>
> This one won't work. We need to forcibly re-enable the hw IRQs upon root
> unstall requests, regardless of the fact that interrupts are pending in
> the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
> stuck in the delay calibration routine, but there are other more tricky
> places where this would bite too.
But this would be ok?
#else /* !CONFIG_SMP */
__clear_bit(IPIPE_STALL_FLAG,
&ipipe_root_domain->cpudata[cpuid].status);
if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
0)) {
local_irq_disable_hw();
__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
}
local_irq_enable_hw();
#endif /* CONFIG_SMP */
So we can still save one disable IRQ in the fastpath.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 249 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-28 9:15 ` Jan Kiszka
@ 2006-09-28 9:16 ` Philippe Gerum
2006-09-28 9:23 ` Jan Kiszka
0 siblings, 1 reply; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28 9:16 UTC (permalink / raw)
To: Jan Kiszka; +Cc: adeos-main
On Thu, 2006-09-28 at 11:15 +0200, Jan Kiszka wrote:
> Philippe Gerum wrote:
> > On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
> >
> > This one won't work. We need to forcibly re-enable the hw IRQs upon root
> > unstall requests, regardless of the fact that interrupts are pending in
> > the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
> > stuck in the delay calibration routine, but there are other more tricky
> > places where this would bite too.
>
> But this would be ok?
>
> #else /* !CONFIG_SMP */
> __clear_bit(IPIPE_STALL_FLAG,
> &ipipe_root_domain->cpudata[cpuid].status);
>
> if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
> 0)) {
> local_irq_disable_hw();
> __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
> }
> local_irq_enable_hw();
> #endif /* CONFIG_SMP */
>
> So we can still save one disable IRQ in the fastpath.
>
But in such a case, you would have to use clear_bit() to keep atomicity.
> Jan
>
--
Philippe.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-28 9:16 ` Philippe Gerum
@ 2006-09-28 9:23 ` Jan Kiszka
2006-09-28 10:07 ` Philippe Gerum
0 siblings, 1 reply; 13+ messages in thread
From: Jan Kiszka @ 2006-09-28 9:23 UTC (permalink / raw)
To: rpm; +Cc: adeos-main
[-- Attachment #1: Type: text/plain, Size: 1146 bytes --]
Philippe Gerum wrote:
> On Thu, 2006-09-28 at 11:15 +0200, Jan Kiszka wrote:
>> Philippe Gerum wrote:
>>> On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
>>>
>>> This one won't work. We need to forcibly re-enable the hw IRQs upon root
>>> unstall requests, regardless of the fact that interrupts are pending in
>>> the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
>>> stuck in the delay calibration routine, but there are other more tricky
>>> places where this would bite too.
>> But this would be ok?
>>
>> #else /* !CONFIG_SMP */
>> __clear_bit(IPIPE_STALL_FLAG,
>> &ipipe_root_domain->cpudata[cpuid].status);
>>
>> if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
>> 0)) {
>> local_irq_disable_hw();
>> __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
>> }
>> local_irq_enable_hw();
>> #endif /* CONFIG_SMP */
>>
>> So we can still save one disable IRQ in the fastpath.
>>
>
> But in such a case, you would have to use clear_bit() to keep atomicity.
This is for UP, not SMP.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 249 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-28 9:23 ` Jan Kiszka
@ 2006-09-28 10:07 ` Philippe Gerum
2006-09-28 10:31 ` Jan Kiszka
0 siblings, 1 reply; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28 10:07 UTC (permalink / raw)
To: Jan Kiszka; +Cc: adeos-main
On Thu, 2006-09-28 at 11:23 +0200, Jan Kiszka wrote:
> Philippe Gerum wrote:
> > On Thu, 2006-09-28 at 11:15 +0200, Jan Kiszka wrote:
> >> Philippe Gerum wrote:
> >>> On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
> >>>
> >>> This one won't work. We need to forcibly re-enable the hw IRQs upon root
> >>> unstall requests, regardless of the fact that interrupts are pending in
> >>> the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
> >>> stuck in the delay calibration routine, but there are other more tricky
> >>> places where this would bite too.
> >> But this would be ok?
> >>
> >> #else /* !CONFIG_SMP */
> >> __clear_bit(IPIPE_STALL_FLAG,
> >> &ipipe_root_domain->cpudata[cpuid].status);
> >>
> >> if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
> >> 0)) {
> >> local_irq_disable_hw();
> >> __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
> >> }
> >> local_irq_enable_hw();
> >> #endif /* CONFIG_SMP */
> >>
> >> So we can still save one disable IRQ in the fastpath.
> >>
> >
> > But in such a case, you would have to use clear_bit() to keep atomicity.
>
> This is for UP, not SMP.
>
This is not a UP vs SMP issue, the stall bit is strictly CPU local
anyway. The issue is that some archs do _not_ have intrinsically atomic
bitops; they need to emulate them by masking interrupts around the bit
setting operation. Using __clear_bit() in this context would make this
code preemptible by an interrupt in the middle of the bitop. Disabling
hw IRQs on entry allows us to use non-atomic bitops.
> Jan
>
> _______________________________________________
> Adeos-main mailing list
> Adeos-main@domain.hid
> https://mail.gna.org/listinfo/adeos-main
--
Philippe.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-28 10:07 ` Philippe Gerum
@ 2006-09-28 10:31 ` Jan Kiszka
2006-09-28 10:46 ` Philippe Gerum
0 siblings, 1 reply; 13+ messages in thread
From: Jan Kiszka @ 2006-09-28 10:31 UTC (permalink / raw)
To: rpm; +Cc: adeos-main
[-- Attachment #1: Type: text/plain, Size: 1828 bytes --]
Philippe Gerum wrote:
> On Thu, 2006-09-28 at 11:23 +0200, Jan Kiszka wrote:
>> Philippe Gerum wrote:
>>> On Thu, 2006-09-28 at 11:15 +0200, Jan Kiszka wrote:
>>>> Philippe Gerum wrote:
>>>>> On Mon, 2006-09-25 at 09:35 +0200, Jan Kiszka wrote:
>>>>>
>>>>> This one won't work. We need to forcibly re-enable the hw IRQs upon root
>>>>> unstall requests, regardless of the fact that interrupts are pending in
>>>>> the log; some code rely on this. E.g. Adeos/ppc over 2.4 would remain
>>>>> stuck in the delay calibration routine, but there are other more tricky
>>>>> places where this would bite too.
>>>> But this would be ok?
>>>>
>>>> #else /* !CONFIG_SMP */
>>>> __clear_bit(IPIPE_STALL_FLAG,
>>>> &ipipe_root_domain->cpudata[cpuid].status);
>>>>
>>>> if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi !=
>>>> 0)) {
>>>> local_irq_disable_hw();
>>>> __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
>>>> }
>>>> local_irq_enable_hw();
>>>> #endif /* CONFIG_SMP */
>>>>
>>>> So we can still save one disable IRQ in the fastpath.
>>>>
>>> But in such a case, you would have to use clear_bit() to keep atomicity.
>> This is for UP, not SMP.
>>
>
> This is not a UP vs SMP issue, the stall bit is strictly CPU local
> anyway. The issue is that some archs do _not_ have intrinsically atomic
> bitops; they need to emulate them by masking interrupts around the bit
> setting operation. Using __clear_bit() in this context would make this
> code preemptible by an interrupt in the middle of the bitop. Disabling
> hw IRQs on entry allows us to use non-atomic bitops.
Yeah, I meanwhile realised my x86-restricted view as well. :)
Does it still make sense then to optimise like I proposed?
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-28 10:31 ` Jan Kiszka
@ 2006-09-28 10:46 ` Philippe Gerum
2006-09-29 7:48 ` Jan Kiszka
0 siblings, 1 reply; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28 10:46 UTC (permalink / raw)
To: Jan Kiszka; +Cc: adeos-main
On Thu, 2006-09-28 at 12:31 +0200, Jan Kiszka wrote:
> Does it still make sense then to optimise like I proposed?
>
If you did measure a 5% improvement on low-end x86 hw, then I guess so.
Moving __ipipe_unstall_root to the arch-dependent section would do, so
that we could use the most efficient implementation on a case by case
basis.
--
Philippe.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-25 10:44 ` Jan Kiszka
@ 2006-09-28 15:33 ` Philippe Gerum
0 siblings, 0 replies; 13+ messages in thread
From: Philippe Gerum @ 2006-09-28 15:33 UTC (permalink / raw)
To: Jan Kiszka; +Cc: adeos-main
On Mon, 2006-09-25 at 12:44 +0200, Jan Kiszka wrote:
> Jan Kiszka wrote:
> > ...
> > A further idea of mine was to inline light-weight parts of the
> > __ipipe_*_root API on UP (no SMP at hand to judge on it as well).
>
> BTW: I haven't looked at any detail yet, but would PER_CPU here and
> there (e.g. for ipipe_percpu_domain) buy us something on SMP (2.6)?
>
At the very least, we don't need to share any cacheline for
ipipe_percpu_domain[] which is purely a per-CPU variable.
> Jan
>
> _______________________________________________
> Adeos-main mailing list
> Adeos-main@domain.hid
> https://mail.gna.org/listinfo/adeos-main
--
Philippe.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-28 10:46 ` Philippe Gerum
@ 2006-09-29 7:48 ` Jan Kiszka
0 siblings, 0 replies; 13+ messages in thread
From: Jan Kiszka @ 2006-09-29 7:48 UTC (permalink / raw)
To: rpm; +Cc: adeos-main
[-- Attachment #1: Type: text/plain, Size: 1810 bytes --]
Philippe Gerum wrote:
> On Thu, 2006-09-28 at 12:31 +0200, Jan Kiszka wrote:
>> Does it still make sense then to optimise like I proposed?
>>
>
> If you did measure a 5% improvement on low-end x86 hw, then I guess so.
> Moving __ipipe_unstall_root to the arch-dependent section would do, so
> that we could use the most efficient implementation on a case by case
> basis.
>
I'm not seeing 5% alone due to this change, it's a piece in the puzzle. I just
benchmarked the fixed version below against the unmodified one. There is an improvement
of the average hackbench runtime, though it's minimal. The wrong variant I posted first
doesn't show noticeable benefit compared to the correct one. I guess the "critical"
part is switching IRQs off when they were on, and not just enabling them when they
weren't disabled.
So I think the one below could be applied for now, but per-arch optimisations may
remain on the to-do list for the future if we discover archs that could do better with
hand-optimised code here. The x86 assembly looks quite good already.
--- linux-2.6.17.13.orig/kernel/ipipe/core.c
+++ linux-2.6.17.13/kernel/ipipe/core.c
@@ -166,6 +158,7 @@ void __ipipe_unstall_root(void)
{
ipipe_declare_cpuid;
+#ifdef CONFIG_SMP
local_irq_disable_hw();
ipipe_load_cpuid();
@@ -174,6 +167,14 @@ void __ipipe_unstall_root(void)
if (ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)
__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+#else /* !CONFIG_SMP */
+ clear_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status);
+
+ if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)) {
+ local_irq_disable_hw();
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+ }
+#endif /* CONFIG_SMP */
local_irq_enable_hw();
}
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 249 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
2006-09-26 15:07 gilles.chanteperdrix
@ 2006-09-26 15:14 ` Jan Kiszka
0 siblings, 0 replies; 13+ messages in thread
From: Jan Kiszka @ 2006-09-26 15:14 UTC (permalink / raw)
To: gilles.chanteperdrix; +Cc: adeos-main
[-- Attachment #1: Type: text/plain, Size: 886 bytes --]
gilles.chanteperdrix wrote:
> ---------- Debut du message initial -----------
>
> De : adeos-main-bounces@domain.hid
> A : adeos-main@gna.org
> Copies :
> Date : Mon, 25 Sep 2006 12:44:02 +0200
> Objet : Re: [Adeos-main] [PATCH+HACK] optimise root stalling
>
>> Jan Kiszka wrote:
>>> ...
>>> A further idea of mine was to inline light-weight parts of the
>>> __ipipe_*_root API on UP (no SMP at hand to judge on it as
> well).
>> BTW: I haven't looked at any detail yet, but would PER_CPU
> here and
>> there (e.g. for ipipe_percpu_domain) buy us something on SMP
> (2.6)?
>
> I think to remember that the PER_CPU support uses
> smp_processor_id(),
> and we know that smp_processor_id() must not be used over Xenomai
> stacks.
>
Not necessarily: per_cpu(var, cpu) retrieves a variable for the given
cpu without touching smp_processor_id.
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [Adeos-main] [PATCH+HACK] optimise root stalling
@ 2006-09-26 15:07 gilles.chanteperdrix
2006-09-26 15:14 ` Jan Kiszka
0 siblings, 1 reply; 13+ messages in thread
From: gilles.chanteperdrix @ 2006-09-26 15:07 UTC (permalink / raw)
To: jan.kiszka; +Cc: adeos-main
---------- Debut du message initial -----------
De : adeos-main-bounces@domain.hid
A : adeos-main@gna.org
Copies :
Date : Mon, 25 Sep 2006 12:44:02 +0200
Objet : Re: [Adeos-main] [PATCH+HACK] optimise root stalling
> Jan Kiszka wrote:
> > ...
> > A further idea of mine was to inline light-weight parts of the
> > __ipipe_*_root API on UP (no SMP at hand to judge on it as
well).
>
> BTW: I haven't looked at any detail yet, but would PER_CPU
here and
> there (e.g. for ipipe_percpu_domain) buy us something on SMP
(2.6)?
I think to remember that the PER_CPU support uses
smp_processor_id(),
and we know that smp_processor_id() must not be used over Xenomai
stacks.
--
Gilles Chanteperdrix
Accédez au courrier électronique de La Poste
sur www.laposte.net ou sur 3615 LAPOSTENET (0,34 TTC /mn)
1 Giga de stockage gratuit Antispam et antivirus intégrés
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2006-09-29 7:48 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-09-25 7:35 [Adeos-main] [PATCH+HACK] optimise root stalling Jan Kiszka
2006-09-25 10:44 ` Jan Kiszka
2006-09-28 15:33 ` Philippe Gerum
2006-09-28 9:07 ` Philippe Gerum
2006-09-28 9:15 ` Jan Kiszka
2006-09-28 9:16 ` Philippe Gerum
2006-09-28 9:23 ` Jan Kiszka
2006-09-28 10:07 ` Philippe Gerum
2006-09-28 10:31 ` Jan Kiszka
2006-09-28 10:46 ` Philippe Gerum
2006-09-29 7:48 ` Jan Kiszka
2006-09-26 15:07 gilles.chanteperdrix
2006-09-26 15:14 ` Jan Kiszka
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.