linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-04-28 18:35 Elliot Berman
  2022-05-04  9:45 ` Will Deacon
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Elliot Berman @ 2022-04-28 18:35 UTC (permalink / raw)
  To: Juergen Gross, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Will Deacon
  Cc: Prakruthi Deepak Heragu, virtualization, x86, linux-arm-kernel,
	linux-kernel, Murali Nalajala, stable, Elliot Berman

From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>

During hotplug, the stolen time data structure is unmapped and memset.
There is a possibility of the timer IRQ being triggered before memset
and stolen time is getting updated as part of this timer IRQ handler. This
causes the below crash in timer handler -

  [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
  ...
  [ 3458.154398][    C5] Call trace:
  [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
  [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
  [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
  [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
  [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
  [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
  [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
  [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
  [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
  [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
  [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
  [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
  [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
  [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
  [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
  [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
  [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
  [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
  [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
  [ 3458.251656][    C5]  __vunmap+0x154/0x278
  [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
  [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
  [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
  [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
  [ 3458.276638][    C5]  kthread+0x17c/0x1e0
  [ 3458.280691][    C5]  ret_from_fork+0x10/0x20

As a fix, introduce rcu lock to update stolen time structure.

Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
Cc: stable@vger.kernel.org
Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
 - Use RCU instead of disabling interrupts

 arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 75fed4460407..e724ea3d86f0 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
 /* return stolen time in ns by asking the hypervisor */
 static u64 para_steal_clock(int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
+	u64 ret = 0;
 
 	reg = per_cpu_ptr(&stolen_time_region, cpu);
 
@@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
 	 * online notification callback runs. Until the callback
 	 * has run we just return zero.
 	 */
-	if (!reg->kaddr)
+	rcu_read_lock();
+	kaddr = rcu_dereference(reg->kaddr);
+	if (!kaddr) {
+		rcu_read_unlock();
 		return 0;
+	}
 
-	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+	rcu_read_unlock();
+	return ret;
 }
 
 static int stolen_time_cpu_down_prepare(unsigned int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
 
 	reg = this_cpu_ptr(&stolen_time_region);
 	if (!reg->kaddr)
 		return 0;
 
-	memunmap(reg->kaddr);
-	memset(reg, 0, sizeof(*reg));
+	kaddr = reg->kaddr;
+	rcu_assign_pointer(reg->kaddr, NULL);
+	synchronize_rcu();
+	memunmap(kaddr);
 
 	return 0;
 }
 
 static int stolen_time_cpu_online(unsigned int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
 	struct arm_smccc_res res;
 
@@ -93,10 +105,12 @@ static int stolen_time_cpu_online(unsigned int cpu)
 	if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
 		return -EINVAL;
 
-	reg->kaddr = memremap(res.a0,
+	kaddr = memremap(res.a0,
 			      sizeof(struct pvclock_vcpu_stolen_time),
 			      MEMREMAP_WB);
 
+	rcu_assign_pointer(reg->kaddr, kaddr);
+
 	if (!reg->kaddr) {
 		pr_warn("Failed to map stolen time data structure\n");
 		return -ENOMEM;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-04-28 18:35 [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time Elliot Berman
@ 2022-05-04  9:45 ` Will Deacon
  2022-05-04 13:38   ` Juergen Gross
  2022-05-04 13:40 ` Juergen Gross
  2022-05-05 10:49 ` Will Deacon
  2 siblings, 1 reply; 6+ messages in thread
From: Will Deacon @ 2022-05-04  9:45 UTC (permalink / raw)
  To: Elliot Berman
  Cc: Juergen Gross, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable

On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>   ...
>   [ 3458.154398][    C5] Call trace:
>   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>   [ 3458.251656][    C5]  __vunmap+0x154/0x278
>   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>  - Use RCU instead of disabling interrupts
> 
>  arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
> index 75fed4460407..e724ea3d86f0 100644
> --- a/arch/arm64/kernel/paravirt.c
> +++ b/arch/arm64/kernel/paravirt.c
> @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
>  /* return stolen time in ns by asking the hypervisor */
>  static u64 para_steal_clock(int cpu)
>  {
> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>  	struct pv_time_stolen_time_region *reg;
> +	u64 ret = 0;
>  
>  	reg = per_cpu_ptr(&stolen_time_region, cpu);
>  
> @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
>  	 * online notification callback runs. Until the callback
>  	 * has run we just return zero.
>  	 */
> -	if (!reg->kaddr)
> +	rcu_read_lock();
> +	kaddr = rcu_dereference(reg->kaddr);
> +	if (!kaddr) {
> +		rcu_read_unlock();
>  		return 0;
> +	}
>  
> -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
> +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));

Is this READ_ONCE() still required now?

Will

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-05-04  9:45 ` Will Deacon
@ 2022-05-04 13:38   ` Juergen Gross
  2022-05-04 13:50     ` Will Deacon
  0 siblings, 1 reply; 6+ messages in thread
From: Juergen Gross @ 2022-05-04 13:38 UTC (permalink / raw)
  To: Will Deacon, Elliot Berman
  Cc: Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable


[-- Attachment #1.1.1: Type: text/plain, Size: 4012 bytes --]

On 04.05.22 11:45, Will Deacon wrote:
> On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
>> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>>
>> During hotplug, the stolen time data structure is unmapped and memset.
>> There is a possibility of the timer IRQ being triggered before memset
>> and stolen time is getting updated as part of this timer IRQ handler. This
>> causes the below crash in timer handler -
>>
>>    [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>>    ...
>>    [ 3458.154398][    C5] Call trace:
>>    [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>>    [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>>    [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>>    [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>>    [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>>    [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>>    [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>>    [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>>    [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>>    [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>>    [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>>    [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>>    [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>>    [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>>    [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>>    [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>>    [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>>    [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>>    [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>>    [ 3458.251656][    C5]  __vunmap+0x154/0x278
>>    [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>>    [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>>    [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>>    [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>>    [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>>    [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
>>
>> As a fix, introduce rcu lock to update stolen time structure.
>>
>> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
>> ---
>> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>>   - Use RCU instead of disabling interrupts
>>
>>   arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>>   1 file changed, 19 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
>> index 75fed4460407..e724ea3d86f0 100644
>> --- a/arch/arm64/kernel/paravirt.c
>> +++ b/arch/arm64/kernel/paravirt.c
>> @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
>>   /* return stolen time in ns by asking the hypervisor */
>>   static u64 para_steal_clock(int cpu)
>>   {
>> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>>   	struct pv_time_stolen_time_region *reg;
>> +	u64 ret = 0;
>>   
>>   	reg = per_cpu_ptr(&stolen_time_region, cpu);
>>   
>> @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
>>   	 * online notification callback runs. Until the callback
>>   	 * has run we just return zero.
>>   	 */
>> -	if (!reg->kaddr)
>> +	rcu_read_lock();
>> +	kaddr = rcu_dereference(reg->kaddr);
>> +	if (!kaddr) {
>> +		rcu_read_unlock();
>>   		return 0;
>> +	}
>>   
>> -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
>> +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
> 
> Is this READ_ONCE() still required now?

Yes, as it might be called for another cpu than the current one.
stolen_time might just be updated, so you want to avoid load tearing.


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-04-28 18:35 [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time Elliot Berman
  2022-05-04  9:45 ` Will Deacon
@ 2022-05-04 13:40 ` Juergen Gross
  2022-05-05 10:49 ` Will Deacon
  2 siblings, 0 replies; 6+ messages in thread
From: Juergen Gross @ 2022-05-04 13:40 UTC (permalink / raw)
  To: Elliot Berman, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Will Deacon
  Cc: Prakruthi Deepak Heragu, virtualization, x86, linux-arm-kernel,
	linux-kernel, Murali Nalajala, stable


[-- Attachment #1.1.1: Type: text/plain, Size: 2459 bytes --]

On 28.04.22 20:35, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>    [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>    ...
>    [ 3458.154398][    C5] Call trace:
>    [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>    [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>    [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>    [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>    [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>    [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>    [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>    [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>    [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>    [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>    [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>    [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>    [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>    [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>    [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>    [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>    [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>    [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>    [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>    [ 3458.251656][    C5]  __vunmap+0x154/0x278
>    [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>    [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>    [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>    [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>    [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>    [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-05-04 13:38   ` Juergen Gross
@ 2022-05-04 13:50     ` Will Deacon
  0 siblings, 0 replies; 6+ messages in thread
From: Will Deacon @ 2022-05-04 13:50 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Elliot Berman, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable

On Wed, May 04, 2022 at 03:38:47PM +0200, Juergen Gross wrote:
> On 04.05.22 11:45, Will Deacon wrote:
> > On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> > > diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
> > > index 75fed4460407..e724ea3d86f0 100644
> > > --- a/arch/arm64/kernel/paravirt.c
> > > +++ b/arch/arm64/kernel/paravirt.c
> > > @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
> > >   /* return stolen time in ns by asking the hypervisor */
> > >   static u64 para_steal_clock(int cpu)
> > >   {
> > > +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
> > >   	struct pv_time_stolen_time_region *reg;
> > > +	u64 ret = 0;
> > >   	reg = per_cpu_ptr(&stolen_time_region, cpu);
> > > @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
> > >   	 * online notification callback runs. Until the callback
> > >   	 * has run we just return zero.
> > >   	 */
> > > -	if (!reg->kaddr)
> > > +	rcu_read_lock();
> > > +	kaddr = rcu_dereference(reg->kaddr);
> > > +	if (!kaddr) {
> > > +		rcu_read_unlock();
> > >   		return 0;
> > > +	}
> > > -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
> > > +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
> > 
> > Is this READ_ONCE() still required now?
> 
> Yes, as it might be called for another cpu than the current one.
> stolen_time might just be updated, so you want to avoid load tearing.

Ah yes, thanks. The lifetime of the structure is one thing, but the
stolen time field is updated much more regularly than the kaddr pointer.

So:

Acked-by: Will Deacon <will@kernel.org>

Cheers,

Will

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-04-28 18:35 [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time Elliot Berman
  2022-05-04  9:45 ` Will Deacon
  2022-05-04 13:40 ` Juergen Gross
@ 2022-05-05 10:49 ` Will Deacon
  2 siblings, 0 replies; 6+ messages in thread
From: Will Deacon @ 2022-05-05 10:49 UTC (permalink / raw)
  To: Elliot Berman
  Cc: Juergen Gross, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable

Hi Elliot,

On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>   ...
>   [ 3458.154398][    C5] Call trace:
>   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>   [ 3458.251656][    C5]  __vunmap+0x154/0x278
>   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>  - Use RCU instead of disabling interrupts
> 
>  arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)

I applied this locally, but sparse is complaining because the 'kaddr' field
of 'struct pv_time_stolen_time_region' is missing an '__rcu' annotation:

 | arch/arm64/kernel/paravirt.c:112:9: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:112:9: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:112:9:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:112:9:    struct pvclock_vcpu_stolen_time * [sparse]
 | arch/arm64/kernel/paravirt.c:67:17: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:67:17: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:67:17:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:67:17:    struct pvclock_vcpu_stolen_time * [sparse]
 | arch/arm64/kernel/paravirt.c:88:9: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:88:9: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:88:9:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:88:9:    struct pvclock_vcpu_stolen_time * [sparse]

The diff below seems to make it happy again, but please can you take a
look?

Cheers,

Will

--->8

diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index e724ea3d86f0..57c7c211f8c7 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
-       struct pvclock_vcpu_stolen_time *kaddr;
+       struct pvclock_vcpu_stolen_time __rcu *kaddr;
 };
 
 static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -84,8 +84,7 @@ static int stolen_time_cpu_down_prepare(unsigned int cpu)
        if (!reg->kaddr)
                return 0;
 
-       kaddr = reg->kaddr;
-       rcu_assign_pointer(reg->kaddr, NULL);
+       kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
        synchronize_rcu();
        memunmap(kaddr);
 
@@ -116,8 +115,8 @@ static int stolen_time_cpu_online(unsigned int cpu)
                return -ENOMEM;
        }
 
-       if (le32_to_cpu(reg->kaddr->revision) != 0 ||
-           le32_to_cpu(reg->kaddr->attributes) != 0) {
+       if (le32_to_cpu(kaddr->revision) != 0 ||
+           le32_to_cpu(kaddr->attributes) != 0) {
                pr_warn_once("Unexpected revision or attributes in stolen time data\n");
                return -ENXIO;
        }


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-05-05 10:49 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-28 18:35 [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time Elliot Berman
2022-05-04  9:45 ` Will Deacon
2022-05-04 13:38   ` Juergen Gross
2022-05-04 13:50     ` Will Deacon
2022-05-04 13:40 ` Juergen Gross
2022-05-05 10:49 ` Will Deacon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).