Re: [PATCH] arm/perf: Fix pmu percpu irq handling at hotplug.

From: Mark Rutland <mark.rutland@arm.com>
To: Yabin Cui <yabinc@google.com>
Cc: will.deacon@arm.com, linux-kernel@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH] arm/perf: Fix pmu percpu irq handling at hotplug.
Date: Fri, 19 Aug 2016 15:25:14 +0100	[thread overview]
Message-ID: <20160819142514.GB28422@leverpostej> (raw)
In-Reply-To: <CALJ9ZPMixfGKspzaz+vTGQZG4EaQB5Futss-aC-Z+8oztVX9Nw@mail.gmail.com>

Hi,

[adding linux-arm-kernel]

Thanks for the report and the patch.

As a general note, the patch came through as HTML with whitespace
mangled, making it impossible to apply and painful to review. In future,
please make sure patches are sent as inline plain text, as per
Documentation/SubmittingPatches.

On Thu, Aug 18, 2016 at 01:24:38PM -0700, Yabin Cui wrote:
>    If the cpu pmu is using a percpu irq:                                    
>          
>    1. When a cpu is down, we should disable pmu irq on                      
>    that cpu. Otherwise, if the cpu is still down when                        
>    the last perf event is released, the pmu irq can't                        
>    be freed. Because the irq is still enabled on the                        
>    offlined cpu. And following perf_event_open()                            
>    syscalls will fail.                                                      
>
>    2. When a cpu is up, we should enable pmu irq on                          
>    that cpu. Otherwise, profiling tools can't sample                        
>    events on the cpu before all perf events are                              
>    released, because pmu irq is disabled on that cpu.                        

It also looks like if a CPU is taken down while events are active, a
non-percpu interrupt will get migrated to another CPU, yet we don't
retarget it if/when the CPU is brought back online. So we have at least
three bugs with IRQ manipulation around hotplug.

Rather than adding more moving parts to the IRQ manipulation logic, I'd
rather we rework the IRQ manipulation logic to:

* At probe time, request all the interrupts. If we can't, bail out and
  fail the probe.

* Upon hotplug in (and at probe time), configure the affinity and
  enable the relevant interrupt(s).

* Upon hotplug out, disable the relevant interrupt.

That way we have fewer moving parts that need to interact with each
other (e.g. we don't need to inhibit hotplug in places), and we know
early whether things will or will not work.

The {reserve,release}_hardware dance is largely a legacy thing that was
there to cater for sharing the PMU with other subsystems, and we should
be able to get rid of it.

I'm taking a look at doing the above, but I don't yet have a patch.

Thanks,
Mark.

>          
>    Signed-off-by: Yabin Cui <[1]yabinc@google.com>                          
>    ---                                                                      
>     drivers/perf/arm_pmu.c       | 45
>    +++++++++++++++++++++++++++++++++++++------- 
>     include/linux/perf/arm_pmu.h |  1 +                                      
>         
>     2 files changed, 39 insertions(+), 7 deletions(-)                        
>          
>    diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c              
>    index c494613..ca11d09 100644                                            
>    --- a/drivers/perf/arm_pmu.c                                              
>    +++ b/drivers/perf/arm_pmu.c                                              
>    @@ -354,19 +354,22 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void
>    *dev)
>     static void                                                              
>     armpmu_release_hardware(struct arm_pmu *armpmu)                          
>     {                                                                        
>    +       get_online_cpus();                                                
>            armpmu->free_irq(armpmu);                                        
>    +       put_online_cpus();                                                
>     }                                                                        
>                                                                             
>     static int                                                              
>     armpmu_reserve_hardware(struct arm_pmu *armpmu)                          
>     {                                                                        
>    -       int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);      
>    -       if (err) {                                                        
>    -               armpmu_release_hardware(armpmu);                          
>    -               return err;                                              
>    -       }                                                                
>    +       int err;                                                          
>                                                                             
>    -       return 0;                                                        
>    +       get_online_cpus();                                                
>    +       err = armpmu->request_irq(armpmu, armpmu_dispatch_irq);          
>    +       if (err)                                                          
>    +               armpmu_release_hardware(armpmu);                          
>    +       put_online_cpus();                                                
>    +       return err;                                                      
>     }                                                                        
>                                                                             
>     static void                                                              
>    @@ -606,6 +609,7 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
>                    on_each_cpu_mask(&cpu_pmu->supported_cpus,                
>                                     cpu_pmu_disable_percpu_irq, &irq, 1);    
>                    free_percpu_irq(irq, &hw_events->percpu_pmu);            
>    +               cpu_pmu->percpu_irq_requested = false;                    
>            } else {                                                          
>                    for (i = 0; i < irqs; ++i) {                              
>                            int cpu = i;                                      
>    @@ -649,6 +653,7 @@ static int cpu_pmu_request_irq(struct arm_pmu
>    *cpu_pmu, irq_handler_t handler)
>                    on_each_cpu_mask(&cpu_pmu->supported_cpus,                
>                                     cpu_pmu_enable_percpu_irq, &irq, 1);    
>    +               cpu_pmu->percpu_irq_requested = true;                    
>            } else {                                                          
>                    for (i = 0; i < irqs; ++i) {                              
>                            int cpu = i;                                      
>    @@ -708,6 +713,31 @@ static int arm_perf_starting_cpu(unsigned int cpu)  
>                            continue;                                        
>                    if (pmu->reset)                                          
>                            pmu->reset(pmu);                                  
>    +                                                                        
>    +               if (pmu->percpu_irq_requested) {                          
>    +                       int irq = platform_get_irq(pmu->plat_device, 0);  
>    +                                                                        
>    +                       cpu_pmu_enable_percpu_irq(&irq);                  
>    +               }                                                        
>    +       }                                                                
>    +       spin_unlock(&arm_pmu_lock);                                      
>    +       return 0;                                                        
>    +}                                                                        
>    +                                                      
>    +static int arm_perf_teardown_cpu(unsigned int cpu)                      
>    +{                                                                        
>    +       struct arm_pmu *pmu;                                              
>    +                                                                        
>    +       spin_lock(&arm_pmu_lock);                                        
>    +       list_for_each_entry(pmu, &arm_pmu_list, entry) {                  
>    +                                                                        
>    +               if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))        
>    +                       continue;                                        
>    +               if (pmu->percpu_irq_requested) {                          
>    +                       int irq = platform_get_irq(pmu->plat_device, 0);  
>    +                                                                        
>    +                       cpu_pmu_disable_percpu_irq(&irq);                
>    +               }                                                        
>            }                                                                
>          
>            spin_unlock(&arm_pmu_lock);                                      
>          
>            return 0;                                                        
>          
>    @@ -1070,7 +1100,8 @@ static int arm_pmu_hp_init(void)                    
>            ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_STARTING,      
>                                            "AP_PERF_ARM_STARTING",          
>    -                                       arm_perf_starting_cpu, NULL);    
>    +                                       arm_perf_starting_cpu,            
>    +                                       arm_perf_teardown_cpu);          
>            if (ret)                                                          
>                    pr_err("CPU hotplug notifier for ARM PMU could not be
>    registered: %d\n",
>                           ret);                                              
>    diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h  
>    index e188438..188ff09 100644                                            
>    --- a/include/linux/perf/arm_pmu.h                                        
>    +++ b/include/linux/perf/arm_pmu.h                                        
>    @@ -79,6 +79,7 @@ struct pmu_hw_events {                                  
>                                                                             
>     struct arm_pmu {                                                        
>            struct pmu      pmu;                                              
>    +       bool            percpu_irq_requested;                            
>            cpumask_t       active_irqs;                                      
>            cpumask_t       supported_cpus;                                  
>            int             *irq_affinity;                                    
>    --                                                                        
>         
>    2.8.0.rc3.226.g39d4020
> 
> References
> 
>    Visible links
>    1. mailto:yabinc@google.com