All of lore.kernel.org
 help / color / mirror / Atom feed
* [Xenomai] Xenomai 3 Multi-core Semaphore latency
@ 2018-05-18 16:24 Singh, Raman
  2018-05-20 15:07 ` Philippe Gerum
  0 siblings, 1 reply; 7+ messages in thread
From: Singh, Raman @ 2018-05-18 16:24 UTC (permalink / raw)
  To: xenomai

Environment: ARM Cortex-A53 quad-core processor (ARM 64-bit) on a
Zynq Ultrascale+ ZCU102 dev board, Xenomai 3 next branch from May 
14, 2018 (SHA1: 410a4cc1109ba4e0d05b7ece7b4a5210287e1183 ), 
Cobalt configuration with POSIX skin, Linux Kernel version 4.9.24

I've been having issues with semaphore latency when threads access 
semaphores while executing on different cores. When both threads accessing 
a semaphore execute on the same processor core, the latency between
one thread posting a semaphore and another waking up after waiting on it 
is fairly small. However, as soon as one of the threads is moved to a 
different core, the latency between a semaphore post from one thread to a 
waiting thread waking up in response starts to become large enough to 
affect real time performance.  The latencies I've been seeing are on the order
of 100's of milliseconds.

The code below demonstrates the issue. It creates two threads. One thread
calls sem_post() and sleeps for a millisecond for each iteration of a loop 
for 1000 iterations. The other thread performs a blocking wait on the 
semaphore in a loop for the same amount of iterations. Both threads print 
out their operation (either sem_post() or sem_wait()) along with a 
timestamp and loop iteration count. The expectation is that the printouts 
that result should alternate between posts and waits and this is in fact 
what happens when both threads run on the same core. The timestamps show 
that the latency between a post and the second thread waking up is pretty 
low.

When the threads are moved to separate cores, the pattern of the printouts
changes. Instead of alternating posts followed by waits, there are
multiple posts in a row followed by an equal number of waits in a row. The 
timestamps show significant latency between the first call to sem_post() 
in a sequence and the corresponding first sem_wait() call that returns
after blocking.

I've also tried this test code using standard Linux primitives, and in 
both cases, the printouts come out the same with alternating posts and 
waits.  Additionally, this multi-core issue does not occur on an older ZC706 
platform with Xenomai 2.6.x.

With regard to the executable generated from the code below, with no 
arguments provided it will run with both threads on the same core. If
any argument(s) are provided, it will run with the threads on separate
cores.

Any help with this issue would be appreciated.

Thanks, 
Raman

#include <cstdlib>
#include <errno.h>
#include <stdio.h>
#include <pthread.h>
#include <semaphore.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

static sem_t semaphore;

void* producer_thread(void* arg)
{
    int count = 0;

    struct timespec current_time;

    while(count < 1000)
    {
        sem_post(&semaphore);
        ++count;

        if(clock_gettime(CLOCK_REALTIME, &current_time) == -1)
        {
            printf("Failed to get current time.\n");
            return 0;
        }

        printf("sem_post:%lld.%.9ld, count = %d\n",
               static_cast<long long>(current_time.tv_sec), current_time.tv_nsec, count);

        usleep(1000);
    }

    return 0;
}

void* consumer_thread(void* arg)
{
    int count = 0;

    struct timespec current_time;

    while(count < 1000)
    {
        sem_wait(&semaphore);
        ++count;

        if(clock_gettime(CLOCK_REALTIME, &current_time) == -1)
        {
            printf("Failed to get current time.\n");
            return 0;
        }

        printf("sem_wait:%lld.%.9ld, count = %d\n",
               static_cast<long long>(current_time.tv_sec), current_time.tv_nsec, count);

    }

    return 0;
}

int main(int argc, char *argv[])
{
    bool same_core = true;
    if(argc > 1) same_core = false;

    if(sem_init(&semaphore, 0, 0) == -1)
    {
        printf("Failed to initialize semaphore. Error: %s\n.", strerror(errno));
        return -1;
    }

    int producer_affinity = 0;
    int consumer_affinity = 0;
    if(!same_core) consumer_affinity = 1;

    int consumer_priority = 86;
    int producer_priority = 99;

    pthread_attr_t attr;

    if(pthread_attr_init(&attr))
    {
        printf("Failed to initialize attributes.\n");
        return -1;
    }

    if(pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED))
    {
        printf("Failed to disable scheduler inheritance.\n");
        return -1;
    }

    if(pthread_attr_setschedpolicy(&attr, SCHED_FIFO))
    {
        printf("Failed to set scheduler to SCHED_FIFO.\n");
        return -1;
    }

    sched_param param;
    if(!pthread_attr_getschedparam(&attr, &param))
    {
        param.sched_priority = consumer_priority;

        if(pthread_attr_setschedparam(&attr, &param))
        {
           printf("Unable to set thread priority.\n");
           return -1;
        }
    }
    else
    {
        printf("Unable to get params for setting priority.\n");
        return -1;
    }

    cpu_set_t cpu_set;
    CPU_ZERO(&cpu_set);
    CPU_SET(consumer_affinity, &cpu_set);
    if(pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpu_set))
    {
        printf("Unable to set CPU affinity.\n");
        return -1;
    }

    pthread_t consumer;
    if(pthread_create(&consumer, &attr, consumer_thread, NULL))
    {
        printf("Unable to create consumer thread.\n");
        return -1;
    }

    if(!pthread_attr_getschedparam(&attr, &param))
    {
        param.sched_priority = producer_priority;

        if(pthread_attr_setschedparam(&attr, &param))
        {
           printf("Unable to set thread priority.\n");
           return -1;
        }
    }
    else
    {
        printf("Unable to get params for setting priority.\n");
        return -1;
    }

    CPU_ZERO(&cpu_set);
    CPU_SET(producer_affinity, &cpu_set);
    if(pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpu_set))
    {
        printf("Unable to set CPU affinity.\n");
        return -1;
    }

    pthread_t producer;
    if(pthread_create(&producer, &attr, producer_thread, NULL))
    {
        printf("Unable to create producer thread.\n");
        return -1;
    }

    pthread_attr_destroy(&attr);

    pthread_join(producer, NULL);
    pthread_join(consumer, NULL);

    sem_destroy(&semaphore);

    return 0;
}


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Xenomai] Xenomai 3 Multi-core Semaphore latency
  2018-05-18 16:24 [Xenomai] Xenomai 3 Multi-core Semaphore latency Singh, Raman
@ 2018-05-20 15:07 ` Philippe Gerum
  2018-05-22  5:06   ` Dmitriy Cherkasov
  0 siblings, 1 reply; 7+ messages in thread
From: Philippe Gerum @ 2018-05-20 15:07 UTC (permalink / raw)
  To: Singh, Raman, xenomai, Dmitriy Cherkasov

On 05/18/2018 06:24 PM, Singh, Raman wrote:
> Environment: ARM Cortex-A53 quad-core processor (ARM 64-bit) on a
> Zynq Ultrascale+ ZCU102 dev board, Xenomai 3 next branch from May 
> 14, 2018 (SHA1: 410a4cc1109ba4e0d05b7ece7b4a5210287e1183 ), 
> Cobalt configuration with POSIX skin, Linux Kernel version 4.9.24
> 
> I've been having issues with semaphore latency when threads access 
> semaphores while executing on different cores. When both threads accessing 
> a semaphore execute on the same processor core, the latency between
> one thread posting a semaphore and another waking up after waiting on it 
> is fairly small. However, as soon as one of the threads is moved to a 
> different core, the latency between a semaphore post from one thread to a 
> waiting thread waking up in response starts to become large enough to 
> affect real time performance.  The latencies I've been seeing are on the order
> of 100's of milliseconds.
> 

Reproduced on hikey here: the rescheduling IPIs Xenomai is sending for
waking up threads on remote CPUs don't flow to the other end properly
(ipipe_send_ipi()), which explains the behavior you have been seeing.

@Dmitriy: this may be an issue with the range of SGIs available to the
kernel when a secure firmware is enabled, which may be restricted to
SGI[0-7].

For the rescheduling IPI on ARM64, the interrupt pipeline attempts to
trigger SGI8 which may be reserved by the ATF in secure mode, therefore
may never be received on the remote end.

Fixing this will require some work in the interrupt pipeline, typically
for multiplexing our IPIs on a single SGI below SGI8. As a matter of
fact, the same issue exists on the ARM side, but since running a secure
firmware there is uncommon for Xenomai users, this went unnoticed (at
least not reported yet AFAIR). We need to sync up on this not to
duplicate work.

-- 
Philippe.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Xenomai] Xenomai 3 Multi-core Semaphore latency
  2018-05-20 15:07 ` Philippe Gerum
@ 2018-05-22  5:06   ` Dmitriy Cherkasov
  2018-05-22  7:29     ` Philippe Gerum
  2018-06-12 16:18     ` Jeff Melvile
  0 siblings, 2 replies; 7+ messages in thread
From: Dmitriy Cherkasov @ 2018-05-22  5:06 UTC (permalink / raw)
  To: Philippe Gerum, Singh, Raman, xenomai

On 05/20/2018 08:07 AM, Philippe Gerum wrote:
> On 05/18/2018 06:24 PM, Singh, Raman wrote:
>> Environment: ARM Cortex-A53 quad-core processor (ARM 64-bit) on a
>> Zynq Ultrascale+ ZCU102 dev board, Xenomai 3 next branch from May 
>> 14, 2018 (SHA1: 410a4cc1109ba4e0d05b7ece7b4a5210287e1183 ), 
>> Cobalt configuration with POSIX skin, Linux Kernel version 4.9.24
>>
>> I've been having issues with semaphore latency when threads access 
>> semaphores while executing on different cores. When both threads accessing 
>> a semaphore execute on the same processor core, the latency between
>> one thread posting a semaphore and another waking up after waiting on it 
>> is fairly small. However, as soon as one of the threads is moved to a 
>> different core, the latency between a semaphore post from one thread to a 
>> waiting thread waking up in response starts to become large enough to 
>> affect real time performance.  The latencies I've been seeing are on the order
>> of 100's of milliseconds.
>>
> 
> Reproduced on hikey here: the rescheduling IPIs Xenomai is sending for
> waking up threads on remote CPUs don't flow to the other end properly
> (ipipe_send_ipi()), which explains the behavior you have been seeing.
> 
> @Dmitriy: this may be an issue with the range of SGIs available to the
> kernel when a secure firmware is enabled, which may be restricted to
> SGI[0-7].
> 
> For the rescheduling IPI on ARM64, the interrupt pipeline attempts to
> trigger SGI8 which may be reserved by the ATF in secure mode, therefore
> may never be received on the remote end.
> 
> Fixing this will require some work in the interrupt pipeline, typically
> for multiplexing our IPIs on a single SGI below SGI8. As a matter of
> fact, the same issue exists on the ARM side, but since running a secure
> firmware there is uncommon for Xenomai users, this went unnoticed (at
> least not reported yet AFAIR). We need to sync up on this not to
> duplicate work.
> 

I see this on Hikey with the latest ipipe-arm64 tree as well. I can confirm the
reschedule IPI isn't being received although it is sent. Rearranging the IPIs
to move reschedule up a few spots resolves the issue, so I think this confirms
the root cause.

Philippe, are there architectures that already do this type of multiplexing, or
does this mechanism need to be designed from scratch?


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Xenomai] Xenomai 3 Multi-core Semaphore latency
  2018-05-22  5:06   ` Dmitriy Cherkasov
@ 2018-05-22  7:29     ` Philippe Gerum
  2018-06-12 16:18     ` Jeff Melvile
  1 sibling, 0 replies; 7+ messages in thread
From: Philippe Gerum @ 2018-05-22  7:29 UTC (permalink / raw)
  To: Dmitriy Cherkasov, Singh, Raman, xenomai

On 05/22/2018 07:06 AM, Dmitriy Cherkasov wrote:
> On 05/20/2018 08:07 AM, Philippe Gerum wrote:
>> On 05/18/2018 06:24 PM, Singh, Raman wrote:
>>> Environment: ARM Cortex-A53 quad-core processor (ARM 64-bit) on a
>>> Zynq Ultrascale+ ZCU102 dev board, Xenomai 3 next branch from May 
>>> 14, 2018 (SHA1: 410a4cc1109ba4e0d05b7ece7b4a5210287e1183 ), 
>>> Cobalt configuration with POSIX skin, Linux Kernel version 4.9.24
>>>
>>> I've been having issues with semaphore latency when threads access 
>>> semaphores while executing on different cores. When both threads accessing 
>>> a semaphore execute on the same processor core, the latency between
>>> one thread posting a semaphore and another waking up after waiting on it 
>>> is fairly small. However, as soon as one of the threads is moved to a 
>>> different core, the latency between a semaphore post from one thread to a 
>>> waiting thread waking up in response starts to become large enough to 
>>> affect real time performance.  The latencies I've been seeing are on the order
>>> of 100's of milliseconds.
>>>
>>
>> Reproduced on hikey here: the rescheduling IPIs Xenomai is sending for
>> waking up threads on remote CPUs don't flow to the other end properly
>> (ipipe_send_ipi()), which explains the behavior you have been seeing.
>>
>> @Dmitriy: this may be an issue with the range of SGIs available to the
>> kernel when a secure firmware is enabled, which may be restricted to
>> SGI[0-7].
>>
>> For the rescheduling IPI on ARM64, the interrupt pipeline attempts to
>> trigger SGI8 which may be reserved by the ATF in secure mode, therefore
>> may never be received on the remote end.
>>
>> Fixing this will require some work in the interrupt pipeline, typically
>> for multiplexing our IPIs on a single SGI below SGI8. As a matter of
>> fact, the same issue exists on the ARM side, but since running a secure
>> firmware there is uncommon for Xenomai users, this went unnoticed (at
>> least not reported yet AFAIR). We need to sync up on this not to
>> duplicate work.
>>
> 
> I see this on Hikey with the latest ipipe-arm64 tree as well. I can confirm the
> reschedule IPI isn't being received although it is sent. Rearranging the IPIs
> to move reschedule up a few spots resolves the issue, so I think this confirms
> the root cause.
> 
> Philippe, are there architectures that already do this type of multiplexing, or
> does this mechanism need to be designed from scratch?
> 

ppc implements a muxed IPI scheme for platforms with interrupt
controllers not providing enough IPI channels (i.e. less than 4). This
is done in the SMP support code, which enables the feature for all ICs
that would require it (CONFIG_PPC_SMP_MUXED_IPI).

We could use a similar approach, except that we may want to multiplex
all of the regular kernel inter-processor messages (i.e.
IPI_WAKEUP..IPI_CPU_BACKTRACE) on a single IPI vector, mapping I-pipe
messages 1:1 onto the remaining IPI vectors for efficiency. That would
leave us with 1 (mux) + 3 (HRTIMER, RESCHED and CRITICAL) SGIs used.

-- 
Philippe.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Xenomai] Xenomai 3 Multi-core Semaphore latency
  2018-05-22  5:06   ` Dmitriy Cherkasov
  2018-05-22  7:29     ` Philippe Gerum
@ 2018-06-12 16:18     ` Jeff Melvile
  2018-06-14 13:46       ` Philippe Gerum
  1 sibling, 1 reply; 7+ messages in thread
From: Jeff Melvile @ 2018-06-12 16:18 UTC (permalink / raw)
  To: Dmitriy Cherkasov; +Cc: xenomai

Dmitriy (and Philippe),

Thanks for looking into this. I'm working with Raman.

On Tue, 22 May 2018, Dmitriy Cherkasov wrote:

> On 05/20/2018 08:07 AM, Philippe Gerum wrote:
> > On 05/18/2018 06:24 PM, Singh, Raman wrote:
> >> Environment: ARM Cortex-A53 quad-core processor (ARM 64-bit) on a
> >> Zynq Ultrascale+ ZCU102 dev board, Xenomai 3 next branch from May 
> >> 14, 2018 (SHA1: 410a4cc1109ba4e0d05b7ece7b4a5210287e1183 ), 
> >> Cobalt configuration with POSIX skin, Linux Kernel version 4.9.24
> >>
> >> I've been having issues with semaphore latency when threads access 
> >> semaphores while executing on different cores. When both threads accessing 
> >> a semaphore execute on the same processor core, the latency between
> >> one thread posting a semaphore and another waking up after waiting on it 
> >> is fairly small. However, as soon as one of the threads is moved to a 
> >> different core, the latency between a semaphore post from one thread to a 
> >> waiting thread waking up in response starts to become large enough to 
> >> affect real time performance.  The latencies I've been seeing are on the order
> >> of 100's of milliseconds.
> >>
> > 
> > Reproduced on hikey here: the rescheduling IPIs Xenomai is sending for
> > waking up threads on remote CPUs don't flow to the other end properly
> > (ipipe_send_ipi()), which explains the behavior you have been seeing.
> > 
> > @Dmitriy: this may be an issue with the range of SGIs available to the
> > kernel when a secure firmware is enabled, which may be restricted to
> > SGI[0-7].
> > 
> > For the rescheduling IPI on ARM64, the interrupt pipeline attempts to
> > trigger SGI8 which may be reserved by the ATF in secure mode, therefore
> > may never be received on the remote end.
> > 
> > Fixing this will require some work in the interrupt pipeline, typically
> > for multiplexing our IPIs on a single SGI below SGI8. As a matter of
> > fact, the same issue exists on the ARM side, but since running a secure
> > firmware there is uncommon for Xenomai users, this went unnoticed (at
> > least not reported yet AFAIR). We need to sync up on this not to
> > duplicate work.
> > 
> 
> I see this on Hikey with the latest ipipe-arm64 tree as well. I can confirm the
> reschedule IPI isn't being received although it is sent. Rearranging the IPIs
> to move reschedule up a few spots resolves the issue, so I think this confirms
> the root cause.

Short term - what is the consequence of naively rearranging the IPIs? What 
else breaks? FWIW secure firmware is not in use. Is your test patch 
something we can apply to be able to test the multi-core aspects of our 
software?

Let me know if there is anything either of us can do to help. We have 
kernel development experience but admittedly not quite at this level.

> 
> Philippe, are there architectures that already do this type of multiplexing, or
> does this mechanism need to be designed from scratch?
> 
> _______________________________________________
> Xenomai mailing list
> Xenomai@xenomai.org
> https://xenomai.org/mailman/listinfo/xenomai
> 

Thanks,
Jeff


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Xenomai] Xenomai 3 Multi-core Semaphore latency
  2018-06-12 16:18     ` Jeff Melvile
@ 2018-06-14 13:46       ` Philippe Gerum
  2018-06-18 14:47         ` Jeff Melvile
  0 siblings, 1 reply; 7+ messages in thread
From: Philippe Gerum @ 2018-06-14 13:46 UTC (permalink / raw)
  To: Jeff Melvile, Dmitriy Cherkasov; +Cc: xenomai

On 06/12/2018 06:18 PM, Jeff Melvile wrote:
> Dmitriy (and Philippe),
> 
> Thanks for looking into this. I'm working with Raman.
> 
> On Tue, 22 May 2018, Dmitriy Cherkasov wrote:
> 
>> On 05/20/2018 08:07 AM, Philippe Gerum wrote:
>>> On 05/18/2018 06:24 PM, Singh, Raman wrote:
>>>> Environment: ARM Cortex-A53 quad-core processor (ARM 64-bit) on a
>>>> Zynq Ultrascale+ ZCU102 dev board, Xenomai 3 next branch from May 
>>>> 14, 2018 (SHA1: 410a4cc1109ba4e0d05b7ece7b4a5210287e1183 ), 
>>>> Cobalt configuration with POSIX skin, Linux Kernel version 4.9.24
>>>>
>>>> I've been having issues with semaphore latency when threads access 
>>>> semaphores while executing on different cores. When both threads accessing 
>>>> a semaphore execute on the same processor core, the latency between
>>>> one thread posting a semaphore and another waking up after waiting on it 
>>>> is fairly small. However, as soon as one of the threads is moved to a 
>>>> different core, the latency between a semaphore post from one thread to a 
>>>> waiting thread waking up in response starts to become large enough to 
>>>> affect real time performance.  The latencies I've been seeing are on the order
>>>> of 100's of milliseconds.
>>>>
>>>
>>> Reproduced on hikey here: the rescheduling IPIs Xenomai is sending for
>>> waking up threads on remote CPUs don't flow to the other end properly
>>> (ipipe_send_ipi()), which explains the behavior you have been seeing.
>>>
>>> @Dmitriy: this may be an issue with the range of SGIs available to the
>>> kernel when a secure firmware is enabled, which may be restricted to
>>> SGI[0-7].
>>>
>>> For the rescheduling IPI on ARM64, the interrupt pipeline attempts to
>>> trigger SGI8 which may be reserved by the ATF in secure mode, therefore
>>> may never be received on the remote end.
>>>
>>> Fixing this will require some work in the interrupt pipeline, typically
>>> for multiplexing our IPIs on a single SGI below SGI8. As a matter of
>>> fact, the same issue exists on the ARM side, but since running a secure
>>> firmware there is uncommon for Xenomai users, this went unnoticed (at
>>> least not reported yet AFAIR). We need to sync up on this not to
>>> duplicate work.
>>>
>>
>> I see this on Hikey with the latest ipipe-arm64 tree as well. I can confirm the
>> reschedule IPI isn't being received although it is sent. Rearranging the IPIs
>> to move reschedule up a few spots resolves the issue, so I think this confirms
>> the root cause.
> 
> Short term - what is the consequence of naively rearranging the IPIs? What 
> else breaks? FWIW secure firmware is not in use. Is your test patch 
> something we can apply to be able to test the multi-core aspects of our 
> software?
> 
> Let me know if there is anything either of us can do to help. We have 
> kernel development experience but admittedly not quite at this level.
> 

This issue may affect the ARM port in some cases as well, so I took a stab at it for ARM64 since the related code is very similar. Could you test that patch? TIA,

commit 765aa7853642b46e1c13fd1f21dfcb9d049f5bfa (HEAD -> wip/arm64-ipi-4.9)
Author: Philippe Gerum <rpm@xenomai.org>
Date:   Wed Jun 13 19:16:27 2018 +0200

    arm64/ipipe: multiplex IPIs
    
    SGI8-15 can be reserved for the exclusive use of the firmware. The
    ARM64 kernel currently uses six of them (NR_IPI), and the pipeline
    needs to define three more for conveying out-of-band events
    (i.e. reschedule, hrtimer and critical IPIs). Therefore we have to
    multiplex nine inter-processor events over eight SGIs (SGI0-7).
    
    This patch changes the IPI management in order to multiplex all
    regular (in-band) IPIs over SGI0, reserving SGI1-3 for out-of-band
    events.

diff --git a/arch/arm64/include/asm/ipipe.h b/arch/arm64/include/asm/ipipe.h
index b16f03b508d6..8e756be01906 100644
--- a/arch/arm64/include/asm/ipipe.h
+++ b/arch/arm64/include/asm/ipipe.h
@@ -32,6 +32,7 @@
 #include <linux/jump_label.h>
 #include <linux/ipipe_trace.h>
 #include <linux/ipipe_debug.h>
+#include <asm/hardirq.h>
 
 #define IPIPE_CORE_RELEASE	4
 
@@ -165,7 +166,7 @@ static inline void ipipe_unmute_pic(void)
 void __ipipe_early_core_setup(void);
 void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd);
 void __ipipe_root_localtimer(unsigned int irq, void *cookie);
-void __ipipe_grab_ipi(unsigned svc, struct pt_regs *regs);
+void __ipipe_grab_ipi(unsigned int sgi, struct pt_regs *regs);
 void __ipipe_ipis_alloc(void);
 void __ipipe_ipis_request(void);
 
diff --git a/arch/arm64/include/asm/ipipe_base.h b/arch/arm64/include/asm/ipipe_base.h
index 867474e1b075..4d8beb560a2f 100644
--- a/arch/arm64/include/asm/ipipe_base.h
+++ b/arch/arm64/include/asm/ipipe_base.h
@@ -31,13 +31,15 @@
 
 #ifdef CONFIG_SMP
 
-extern unsigned __ipipe_first_ipi;
-
-#define IPIPE_CRITICAL_IPI	__ipipe_first_ipi
-#define IPIPE_HRTIMER_IPI	(IPIPE_CRITICAL_IPI + 1)
-#define IPIPE_RESCHEDULE_IPI	(IPIPE_CRITICAL_IPI + 2)
-
-#define IPIPE_LAST_IPI		IPIPE_RESCHEDULE_IPI
+/*
+ * Out-of-band IPIs are directly mapped to SGI1-3, instead of
+ * multiplexed over SGI0 like regular in-band messages.
+ */
+#define IPIPE_IPI_BASE		IPIPE_VIRQ_BASE
+#define IPIPE_OOB_IPI_NR	3
+#define IPIPE_CRITICAL_IPI	(IPIPE_IPI_BASE + NR_IPI)
+#define IPIPE_HRTIMER_IPI	(IPIPE_IPI_BASE + NR_IPI + 1)
+#define IPIPE_RESCHEDULE_IPI	(IPIPE_IPI_BASE + NR_IPI + 2)
 
 #ifdef CONFIG_IPIPE_LEGACY
 #define hard_smp_processor_id()						\
diff --git a/arch/arm64/kernel/ipipe.c b/arch/arm64/kernel/ipipe.c
index ae1d0f542a3e..57f4e951648e 100644
--- a/arch/arm64/kernel/ipipe.c
+++ b/arch/arm64/kernel/ipipe.c
@@ -219,7 +219,6 @@ void __ipipe_enable_pipeline(void)
 				  irq,
 				  (ipipe_irq_handler_t)__ipipe_do_IRQ,
 				  NULL, NULL);
-
 #ifdef CONFIG_SMP
 	__ipipe_ipis_request();
 #endif /* CONFIG_SMP */
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dbb00c13eb6f..3fe20f7b7f36 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -75,23 +75,8 @@ enum ipi_msg_type {
 	IPI_TIMER,
 	IPI_IRQ_WORK,
 	IPI_WAKEUP,
-#ifdef CONFIG_IPIPE
-	IPI_IPIPE_FIRST,
-#endif /* CONFIG_IPIPE */
 };
 
-#ifdef CONFIG_IPIPE
-#define noipipe_irq_enter()			\
-	do {					\
-	} while(0)
-#define noipipe_irq_exit()			\
-	do {					\
-	} while(0)
-#else /* !CONFIG_IPIPE */
-#define noipipe_irq_enter()	irq_enter()
-#define noipipe_irq_exit()	irq_exit()
-#endif /* !CONFIG_IPIPE */
-
 #ifdef CONFIG_ARM64_VHE
 
 /* Whether the boot CPU is running in HYP mode or not*/
@@ -766,12 +751,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
 	S(IPI_WAKEUP, "CPU wake-up interrupts"),
 };
 
-static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
-{
-	trace_ipi_raise(target, ipi_types[ipinr]);
-	__smp_cross_call(target, ipinr);
-}
-
 void show_ipi_list(struct seq_file *p, int prec)
 {
 	unsigned int cpu, i;
@@ -798,60 +777,123 @@ u64 smp_irq_stat_cpu(unsigned int cpu)
 }
 
 #ifdef CONFIG_IPIPE
-#define IPIPE_IPI_BASE	IPIPE_VIRQ_BASE
 
-unsigned __ipipe_first_ipi;
-EXPORT_SYMBOL_GPL(__ipipe_first_ipi);
+static DEFINE_PER_CPU(unsigned long, ipi_messages);
+
+#define noipipe_irq_enter()			\
+	do {					\
+	} while(0)
+#define noipipe_irq_exit()			\
+	do {					\
+	} while(0)
 
-static void  __ipipe_do_IPI(unsigned virq, void *cookie)
+static void  __ipipe_do_IPI(unsigned int virq, void *cookie)
 {
-	enum ipi_msg_type msg = virq - IPIPE_IPI_BASE;
-	handle_IPI(msg, raw_cpu_ptr(&ipipe_percpu.tick_regs));
+	unsigned int ipinr = virq - IPIPE_IPI_BASE;
+	
+	handle_IPI(ipinr, raw_cpu_ptr(&ipipe_percpu.tick_regs));
 }
 
 void __ipipe_ipis_alloc(void)
 {
-	unsigned int virq, ipi, last_ipi;
+	unsigned int virq, ipi;
+	static bool done;
 
 	/* May be called multiple times via init_stage() */
-	if (__ipipe_first_ipi)
+	if (done)
 		return;
 
-	last_ipi = NR_IPI + IPIPE_LAST_IPI;
-	for (ipi = 0; ipi <= last_ipi; ipi++) {
+	/*
+	 * We have to get virtual IRQs in the range
+	 * [ IPIPE_IPI_BASE..IPIPE_IPI_BASE + NR_IPI + IPIPE_OOB_IPI_NR - 1 ],
+	 * otherwise something is wrong (likely someone would have
+	 * allocated virqs before we do, and this would break our
+	 * fixed numbering scheme for IPIs).
+	 */
+	for (ipi = 0; ipi < NR_IPI + IPIPE_OOB_IPI_NR; ipi++) {
 		virq = ipipe_alloc_virq();
-		if (ipi == IPI_IPIPE_FIRST)
-			__ipipe_first_ipi = virq;
+		WARN_ON_ONCE(virq != IPIPE_IPI_BASE + ipi);
 	}
+
+	done = true;
 }
 
 void __ipipe_ipis_request(void)
 {
-	unsigned virq;
+	unsigned int virq;
 
-	for (virq = IPIPE_IPI_BASE; virq < __ipipe_first_ipi; virq++)
+	/*
+	 * Attach a handler to each VIRQ mapping an IPI which might be
+	 * posted by __ipipe_grab_ipi(). This handler will invoke
+	 * handle_IPI() from the root stage in turn, passing it the
+	 * corresponding IPI message number.
+	 */
+	for (virq = IPIPE_IPI_BASE;
+	     virq < IPIPE_IPI_BASE + NR_IPI + IPIPE_OOB_IPI_NR; virq++)
 		ipipe_request_irq(ipipe_root_domain,
 				  virq,
 				  (ipipe_irq_handler_t)__ipipe_do_IPI,
 				  NULL, NULL);
 }
-void ipipe_send_ipi(unsigned ipi, cpumask_t cpumask)
+
+static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
+{
+	unsigned int cpu, sgi;
+
+	if (ipinr < NR_IPI) {
+		/* regular in-band IPI (multiplexed over SGI0). */
+		trace_ipi_raise_rcuidle(target, ipi_types[ipinr]);
+		for_each_cpu(cpu, target)
+			set_bit(ipinr, &per_cpu(ipi_messages, cpu));
+		smp_mb();
+		sgi = 0;
+	} else	/* out-of-band IPI (SGI1-3). */
+		sgi = ipinr - NR_IPI + 1;
+		
+	__smp_cross_call(target, sgi);
+}
+
+void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask)
 {
-	enum ipi_msg_type msg = ipi - IPIPE_IPI_BASE;
-	smp_cross_call(&cpumask, msg);
+	unsigned int ipinr = ipi - IPIPE_IPI_BASE;
+	smp_cross_call(&cpumask, ipinr);
 }
 EXPORT_SYMBOL_GPL(ipipe_send_ipi);
 
  /* hw IRQs off */
-asmlinkage void __exception __ipipe_grab_ipi(unsigned svc, struct pt_regs *regs)
+asmlinkage void __ipipe_grab_ipi(unsigned int sgi, struct pt_regs *regs)
 {
-	int virq = IPIPE_IPI_BASE + svc;
+	unsigned int ipinr, irq;
+	unsigned long *pmsg;
 
-	__ipipe_dispatch_irq(virq, IPIPE_IRQF_NOACK);
+	if (sgi) {		/* SGI1-3, OOB messages. */
+		irq = sgi + NR_IPI - 1 + IPIPE_IPI_BASE;
+		__ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK);
+	} else {
+		/* In-band IPI (0..NR_IPI-1) multiplexed over SGI0. */
+		pmsg = raw_cpu_ptr(&ipi_messages);
+		while (*pmsg) {
+			ipinr = ffs(*pmsg) - 1;
+			clear_bit(ipinr, pmsg);
+			irq = IPIPE_IPI_BASE + ipinr;
+			__ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK);
+		}
+	}
 
 	__ipipe_exit_irq(regs);
 }
 
+#else
+
+#define noipipe_irq_enter()	irq_enter()
+#define noipipe_irq_exit()	irq_exit()
+
+static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
+{
+	trace_ipi_raise(target, ipi_types[ipinr]);
+	__smp_cross_call(target, ipinr);
+}
+
 #endif /* CONFIG_IPIPE */
 
 void arch_send_call_function_ipi_mask(const struct cpumask *mask)

-- 
Philippe.


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Xenomai] Xenomai 3 Multi-core Semaphore latency
  2018-06-14 13:46       ` Philippe Gerum
@ 2018-06-18 14:47         ` Jeff Melvile
  0 siblings, 0 replies; 7+ messages in thread
From: Jeff Melvile @ 2018-06-18 14:47 UTC (permalink / raw)
  To: Philippe Gerum; +Cc: xenomai


Hi Philippe,

On Thu, 14 Jun 2018, Philippe Gerum wrote:

> On 06/12/2018 06:18 PM, Jeff Melvile wrote:
> > Dmitriy (and Philippe),
> > 
> > Thanks for looking into this. I'm working with Raman.
> > 
> > On Tue, 22 May 2018, Dmitriy Cherkasov wrote:
> > 
> >> On 05/20/2018 08:07 AM, Philippe Gerum wrote:
> >>> On 05/18/2018 06:24 PM, Singh, Raman wrote:
> >>>> Environment: ARM Cortex-A53 quad-core processor (ARM 64-bit) on a
> >>>> Zynq Ultrascale+ ZCU102 dev board, Xenomai 3 next branch from May 
> >>>> 14, 2018 (SHA1: 410a4cc1109ba4e0d05b7ece7b4a5210287e1183 ), 
> >>>> Cobalt configuration with POSIX skin, Linux Kernel version 4.9.24
> >>>>
> >>>> I've been having issues with semaphore latency when threads access 
> >>>> semaphores while executing on different cores. When both threads accessing 
> >>>> a semaphore execute on the same processor core, the latency between
> >>>> one thread posting a semaphore and another waking up after waiting on it 
> >>>> is fairly small. However, as soon as one of the threads is moved to a 
> >>>> different core, the latency between a semaphore post from one thread to a 
> >>>> waiting thread waking up in response starts to become large enough to 
> >>>> affect real time performance.  The latencies I've been seeing are on the order
> >>>> of 100's of milliseconds.
> >>>>
> >>>
> >>> Reproduced on hikey here: the rescheduling IPIs Xenomai is sending for
> >>> waking up threads on remote CPUs don't flow to the other end properly
> >>> (ipipe_send_ipi()), which explains the behavior you have been seeing.
> >>>
> >>> @Dmitriy: this may be an issue with the range of SGIs available to the
> >>> kernel when a secure firmware is enabled, which may be restricted to
> >>> SGI[0-7].
> >>>
> >>> For the rescheduling IPI on ARM64, the interrupt pipeline attempts to
> >>> trigger SGI8 which may be reserved by the ATF in secure mode, therefore
> >>> may never be received on the remote end.
> >>>
> >>> Fixing this will require some work in the interrupt pipeline, typically
> >>> for multiplexing our IPIs on a single SGI below SGI8. As a matter of
> >>> fact, the same issue exists on the ARM side, but since running a secure
> >>> firmware there is uncommon for Xenomai users, this went unnoticed (at
> >>> least not reported yet AFAIR). We need to sync up on this not to
> >>> duplicate work.
> >>>
> >>
> >> I see this on Hikey with the latest ipipe-arm64 tree as well. I can confirm the
> >> reschedule IPI isn't being received although it is sent. Rearranging the IPIs
> >> to move reschedule up a few spots resolves the issue, so I think this confirms
> >> the root cause.
> > 
> > Short term - what is the consequence of naively rearranging the IPIs? What 
> > else breaks? FWIW secure firmware is not in use. Is your test patch 
> > something we can apply to be able to test the multi-core aspects of our 
> > software?
> > 
> > Let me know if there is anything either of us can do to help. We have 
> > kernel development experience but admittedly not quite at this level.
> > 
> 
> This issue may affect the ARM port in some cases as well, so I took a stab at it for ARM64 since the related code is very similar. Could you test that patch? TIA,

Thanks for the patch. We ended up applying it on top of 
a kernel patched with ipipe-core-4.9.24-arm64-2.patch, manually resolving 
the conflicts (contained to smp.c IIRC). Clearly this is a little diferent 
than applying it on top of the ipipe HEAD and generating a fresh patch. 

The fix did resolve the high latencies we were seeing in our application 
across cores. Thanks again for the fix and let me know if you'd 
like us to do any additional testing.

Thanks,
Jeff 

> 
> commit 765aa7853642b46e1c13fd1f21dfcb9d049f5bfa (HEAD -> wip/arm64-ipi-4.9)
> Author: Philippe Gerum <rpm@xenomai.org>
> Date:   Wed Jun 13 19:16:27 2018 +0200
> 
>     arm64/ipipe: multiplex IPIs
>     
>     SGI8-15 can be reserved for the exclusive use of the firmware. The
>     ARM64 kernel currently uses six of them (NR_IPI), and the pipeline
>     needs to define three more for conveying out-of-band events
>     (i.e. reschedule, hrtimer and critical IPIs). Therefore we have to
>     multiplex nine inter-processor events over eight SGIs (SGI0-7).
>     
>     This patch changes the IPI management in order to multiplex all
>     regular (in-band) IPIs over SGI0, reserving SGI1-3 for out-of-band
>     events.
> 
> diff --git a/arch/arm64/include/asm/ipipe.h b/arch/arm64/include/asm/ipipe.h
> index b16f03b508d6..8e756be01906 100644
> --- a/arch/arm64/include/asm/ipipe.h
> +++ b/arch/arm64/include/asm/ipipe.h
> @@ -32,6 +32,7 @@
>  #include <linux/jump_label.h>
>  #include <linux/ipipe_trace.h>
>  #include <linux/ipipe_debug.h>
> +#include <asm/hardirq.h>
>  
>  #define IPIPE_CORE_RELEASE	4
>  
> @@ -165,7 +166,7 @@ static inline void ipipe_unmute_pic(void)
>  void __ipipe_early_core_setup(void);
>  void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd);
>  void __ipipe_root_localtimer(unsigned int irq, void *cookie);
> -void __ipipe_grab_ipi(unsigned svc, struct pt_regs *regs);
> +void __ipipe_grab_ipi(unsigned int sgi, struct pt_regs *regs);
>  void __ipipe_ipis_alloc(void);
>  void __ipipe_ipis_request(void);
>  
> diff --git a/arch/arm64/include/asm/ipipe_base.h b/arch/arm64/include/asm/ipipe_base.h
> index 867474e1b075..4d8beb560a2f 100644
> --- a/arch/arm64/include/asm/ipipe_base.h
> +++ b/arch/arm64/include/asm/ipipe_base.h
> @@ -31,13 +31,15 @@
>  
>  #ifdef CONFIG_SMP
>  
> -extern unsigned __ipipe_first_ipi;
> -
> -#define IPIPE_CRITICAL_IPI	__ipipe_first_ipi
> -#define IPIPE_HRTIMER_IPI	(IPIPE_CRITICAL_IPI + 1)
> -#define IPIPE_RESCHEDULE_IPI	(IPIPE_CRITICAL_IPI + 2)
> -
> -#define IPIPE_LAST_IPI		IPIPE_RESCHEDULE_IPI
> +/*
> + * Out-of-band IPIs are directly mapped to SGI1-3, instead of
> + * multiplexed over SGI0 like regular in-band messages.
> + */
> +#define IPIPE_IPI_BASE		IPIPE_VIRQ_BASE
> +#define IPIPE_OOB_IPI_NR	3
> +#define IPIPE_CRITICAL_IPI	(IPIPE_IPI_BASE + NR_IPI)
> +#define IPIPE_HRTIMER_IPI	(IPIPE_IPI_BASE + NR_IPI + 1)
> +#define IPIPE_RESCHEDULE_IPI	(IPIPE_IPI_BASE + NR_IPI + 2)
>  
>  #ifdef CONFIG_IPIPE_LEGACY
>  #define hard_smp_processor_id()						\
> diff --git a/arch/arm64/kernel/ipipe.c b/arch/arm64/kernel/ipipe.c
> index ae1d0f542a3e..57f4e951648e 100644
> --- a/arch/arm64/kernel/ipipe.c
> +++ b/arch/arm64/kernel/ipipe.c
> @@ -219,7 +219,6 @@ void __ipipe_enable_pipeline(void)
>  				  irq,
>  				  (ipipe_irq_handler_t)__ipipe_do_IRQ,
>  				  NULL, NULL);
> -
>  #ifdef CONFIG_SMP
>  	__ipipe_ipis_request();
>  #endif /* CONFIG_SMP */
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index dbb00c13eb6f..3fe20f7b7f36 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -75,23 +75,8 @@ enum ipi_msg_type {
>  	IPI_TIMER,
>  	IPI_IRQ_WORK,
>  	IPI_WAKEUP,
> -#ifdef CONFIG_IPIPE
> -	IPI_IPIPE_FIRST,
> -#endif /* CONFIG_IPIPE */
>  };
>  
> -#ifdef CONFIG_IPIPE
> -#define noipipe_irq_enter()			\
> -	do {					\
> -	} while(0)
> -#define noipipe_irq_exit()			\
> -	do {					\
> -	} while(0)
> -#else /* !CONFIG_IPIPE */
> -#define noipipe_irq_enter()	irq_enter()
> -#define noipipe_irq_exit()	irq_exit()
> -#endif /* !CONFIG_IPIPE */
> -
>  #ifdef CONFIG_ARM64_VHE
>  
>  /* Whether the boot CPU is running in HYP mode or not*/
> @@ -766,12 +751,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
>  	S(IPI_WAKEUP, "CPU wake-up interrupts"),
>  };
>  
> -static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
> -{
> -	trace_ipi_raise(target, ipi_types[ipinr]);
> -	__smp_cross_call(target, ipinr);
> -}
> -
>  void show_ipi_list(struct seq_file *p, int prec)
>  {
>  	unsigned int cpu, i;
> @@ -798,60 +777,123 @@ u64 smp_irq_stat_cpu(unsigned int cpu)
>  }
>  
>  #ifdef CONFIG_IPIPE
> -#define IPIPE_IPI_BASE	IPIPE_VIRQ_BASE
>  
> -unsigned __ipipe_first_ipi;
> -EXPORT_SYMBOL_GPL(__ipipe_first_ipi);
> +static DEFINE_PER_CPU(unsigned long, ipi_messages);
> +
> +#define noipipe_irq_enter()			\
> +	do {					\
> +	} while(0)
> +#define noipipe_irq_exit()			\
> +	do {					\
> +	} while(0)
>  
> -static void  __ipipe_do_IPI(unsigned virq, void *cookie)
> +static void  __ipipe_do_IPI(unsigned int virq, void *cookie)
>  {
> -	enum ipi_msg_type msg = virq - IPIPE_IPI_BASE;
> -	handle_IPI(msg, raw_cpu_ptr(&ipipe_percpu.tick_regs));
> +	unsigned int ipinr = virq - IPIPE_IPI_BASE;
> +	
> +	handle_IPI(ipinr, raw_cpu_ptr(&ipipe_percpu.tick_regs));
>  }
>  
>  void __ipipe_ipis_alloc(void)
>  {
> -	unsigned int virq, ipi, last_ipi;
> +	unsigned int virq, ipi;
> +	static bool done;
>  
>  	/* May be called multiple times via init_stage() */
> -	if (__ipipe_first_ipi)
> +	if (done)
>  		return;
>  
> -	last_ipi = NR_IPI + IPIPE_LAST_IPI;
> -	for (ipi = 0; ipi <= last_ipi; ipi++) {
> +	/*
> +	 * We have to get virtual IRQs in the range
> +	 * [ IPIPE_IPI_BASE..IPIPE_IPI_BASE + NR_IPI + IPIPE_OOB_IPI_NR - 1 ],
> +	 * otherwise something is wrong (likely someone would have
> +	 * allocated virqs before we do, and this would break our
> +	 * fixed numbering scheme for IPIs).
> +	 */
> +	for (ipi = 0; ipi < NR_IPI + IPIPE_OOB_IPI_NR; ipi++) {
>  		virq = ipipe_alloc_virq();
> -		if (ipi == IPI_IPIPE_FIRST)
> -			__ipipe_first_ipi = virq;
> +		WARN_ON_ONCE(virq != IPIPE_IPI_BASE + ipi);
>  	}
> +
> +	done = true;
>  }
>  
>  void __ipipe_ipis_request(void)
>  {
> -	unsigned virq;
> +	unsigned int virq;
>  
> -	for (virq = IPIPE_IPI_BASE; virq < __ipipe_first_ipi; virq++)
> +	/*
> +	 * Attach a handler to each VIRQ mapping an IPI which might be
> +	 * posted by __ipipe_grab_ipi(). This handler will invoke
> +	 * handle_IPI() from the root stage in turn, passing it the
> +	 * corresponding IPI message number.
> +	 */
> +	for (virq = IPIPE_IPI_BASE;
> +	     virq < IPIPE_IPI_BASE + NR_IPI + IPIPE_OOB_IPI_NR; virq++)
>  		ipipe_request_irq(ipipe_root_domain,
>  				  virq,
>  				  (ipipe_irq_handler_t)__ipipe_do_IPI,
>  				  NULL, NULL);
>  }
> -void ipipe_send_ipi(unsigned ipi, cpumask_t cpumask)
> +
> +static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
> +{
> +	unsigned int cpu, sgi;
> +
> +	if (ipinr < NR_IPI) {
> +		/* regular in-band IPI (multiplexed over SGI0). */
> +		trace_ipi_raise_rcuidle(target, ipi_types[ipinr]);
> +		for_each_cpu(cpu, target)
> +			set_bit(ipinr, &per_cpu(ipi_messages, cpu));
> +		smp_mb();
> +		sgi = 0;
> +	} else	/* out-of-band IPI (SGI1-3). */
> +		sgi = ipinr - NR_IPI + 1;
> +		
> +	__smp_cross_call(target, sgi);
> +}
> +
> +void ipipe_send_ipi(unsigned int ipi, cpumask_t cpumask)
>  {
> -	enum ipi_msg_type msg = ipi - IPIPE_IPI_BASE;
> -	smp_cross_call(&cpumask, msg);
> +	unsigned int ipinr = ipi - IPIPE_IPI_BASE;
> +	smp_cross_call(&cpumask, ipinr);
>  }
>  EXPORT_SYMBOL_GPL(ipipe_send_ipi);
>  
>   /* hw IRQs off */
> -asmlinkage void __exception __ipipe_grab_ipi(unsigned svc, struct pt_regs *regs)
> +asmlinkage void __ipipe_grab_ipi(unsigned int sgi, struct pt_regs *regs)
>  {
> -	int virq = IPIPE_IPI_BASE + svc;
> +	unsigned int ipinr, irq;
> +	unsigned long *pmsg;
>  
> -	__ipipe_dispatch_irq(virq, IPIPE_IRQF_NOACK);
> +	if (sgi) {		/* SGI1-3, OOB messages. */
> +		irq = sgi + NR_IPI - 1 + IPIPE_IPI_BASE;
> +		__ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK);
> +	} else {
> +		/* In-band IPI (0..NR_IPI-1) multiplexed over SGI0. */
> +		pmsg = raw_cpu_ptr(&ipi_messages);
> +		while (*pmsg) {
> +			ipinr = ffs(*pmsg) - 1;
> +			clear_bit(ipinr, pmsg);
> +			irq = IPIPE_IPI_BASE + ipinr;
> +			__ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK);
> +		}
> +	}
>  
>  	__ipipe_exit_irq(regs);
>  }
>  
> +#else
> +
> +#define noipipe_irq_enter()	irq_enter()
> +#define noipipe_irq_exit()	irq_exit()
> +
> +static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
> +{
> +	trace_ipi_raise(target, ipi_types[ipinr]);
> +	__smp_cross_call(target, ipinr);
> +}
> +
>  #endif /* CONFIG_IPIPE */
>  
>  void arch_send_call_function_ipi_mask(const struct cpumask *mask)
> 
> -- 
> Philippe.
> 
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-06-18 14:47 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-18 16:24 [Xenomai] Xenomai 3 Multi-core Semaphore latency Singh, Raman
2018-05-20 15:07 ` Philippe Gerum
2018-05-22  5:06   ` Dmitriy Cherkasov
2018-05-22  7:29     ` Philippe Gerum
2018-06-12 16:18     ` Jeff Melvile
2018-06-14 13:46       ` Philippe Gerum
2018-06-18 14:47         ` Jeff Melvile

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.