All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-04-28 18:35 ` Elliot Berman
  0 siblings, 0 replies; 17+ messages in thread
From: Elliot Berman @ 2022-04-28 18:35 UTC (permalink / raw)
  To: Juergen Gross, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Will Deacon
  Cc: Prakruthi Deepak Heragu, virtualization, x86, linux-arm-kernel,
	linux-kernel, Murali Nalajala, stable, Elliot Berman

From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>

During hotplug, the stolen time data structure is unmapped and memset.
There is a possibility of the timer IRQ being triggered before memset
and stolen time is getting updated as part of this timer IRQ handler. This
causes the below crash in timer handler -

  [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
  ...
  [ 3458.154398][    C5] Call trace:
  [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
  [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
  [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
  [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
  [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
  [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
  [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
  [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
  [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
  [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
  [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
  [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
  [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
  [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
  [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
  [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
  [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
  [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
  [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
  [ 3458.251656][    C5]  __vunmap+0x154/0x278
  [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
  [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
  [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
  [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
  [ 3458.276638][    C5]  kthread+0x17c/0x1e0
  [ 3458.280691][    C5]  ret_from_fork+0x10/0x20

As a fix, introduce rcu lock to update stolen time structure.

Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
Cc: stable@vger.kernel.org
Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
 - Use RCU instead of disabling interrupts

 arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 75fed4460407..e724ea3d86f0 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
 /* return stolen time in ns by asking the hypervisor */
 static u64 para_steal_clock(int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
+	u64 ret = 0;
 
 	reg = per_cpu_ptr(&stolen_time_region, cpu);
 
@@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
 	 * online notification callback runs. Until the callback
 	 * has run we just return zero.
 	 */
-	if (!reg->kaddr)
+	rcu_read_lock();
+	kaddr = rcu_dereference(reg->kaddr);
+	if (!kaddr) {
+		rcu_read_unlock();
 		return 0;
+	}
 
-	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+	rcu_read_unlock();
+	return ret;
 }
 
 static int stolen_time_cpu_down_prepare(unsigned int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
 
 	reg = this_cpu_ptr(&stolen_time_region);
 	if (!reg->kaddr)
 		return 0;
 
-	memunmap(reg->kaddr);
-	memset(reg, 0, sizeof(*reg));
+	kaddr = reg->kaddr;
+	rcu_assign_pointer(reg->kaddr, NULL);
+	synchronize_rcu();
+	memunmap(kaddr);
 
 	return 0;
 }
 
 static int stolen_time_cpu_online(unsigned int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
 	struct arm_smccc_res res;
 
@@ -93,10 +105,12 @@ static int stolen_time_cpu_online(unsigned int cpu)
 	if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
 		return -EINVAL;
 
-	reg->kaddr = memremap(res.a0,
+	kaddr = memremap(res.a0,
 			      sizeof(struct pvclock_vcpu_stolen_time),
 			      MEMREMAP_WB);
 
+	rcu_assign_pointer(reg->kaddr, kaddr);
+
 	if (!reg->kaddr) {
 		pr_warn("Failed to map stolen time data structure\n");
 		return -ENOMEM;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-04-28 18:35 ` Elliot Berman
  0 siblings, 0 replies; 17+ messages in thread
From: Elliot Berman @ 2022-04-28 18:35 UTC (permalink / raw)
  To: Juergen Gross, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Will Deacon
  Cc: Prakruthi Deepak Heragu, virtualization, x86, linux-arm-kernel,
	linux-kernel, Murali Nalajala, stable, Elliot Berman

From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>

During hotplug, the stolen time data structure is unmapped and memset.
There is a possibility of the timer IRQ being triggered before memset
and stolen time is getting updated as part of this timer IRQ handler. This
causes the below crash in timer handler -

  [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
  ...
  [ 3458.154398][    C5] Call trace:
  [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
  [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
  [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
  [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
  [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
  [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
  [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
  [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
  [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
  [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
  [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
  [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
  [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
  [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
  [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
  [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
  [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
  [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
  [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
  [ 3458.251656][    C5]  __vunmap+0x154/0x278
  [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
  [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
  [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
  [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
  [ 3458.276638][    C5]  kthread+0x17c/0x1e0
  [ 3458.280691][    C5]  ret_from_fork+0x10/0x20

As a fix, introduce rcu lock to update stolen time structure.

Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
Cc: stable@vger.kernel.org
Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
 - Use RCU instead of disabling interrupts

 arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 75fed4460407..e724ea3d86f0 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
 /* return stolen time in ns by asking the hypervisor */
 static u64 para_steal_clock(int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
+	u64 ret = 0;
 
 	reg = per_cpu_ptr(&stolen_time_region, cpu);
 
@@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
 	 * online notification callback runs. Until the callback
 	 * has run we just return zero.
 	 */
-	if (!reg->kaddr)
+	rcu_read_lock();
+	kaddr = rcu_dereference(reg->kaddr);
+	if (!kaddr) {
+		rcu_read_unlock();
 		return 0;
+	}
 
-	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
+	rcu_read_unlock();
+	return ret;
 }
 
 static int stolen_time_cpu_down_prepare(unsigned int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
 
 	reg = this_cpu_ptr(&stolen_time_region);
 	if (!reg->kaddr)
 		return 0;
 
-	memunmap(reg->kaddr);
-	memset(reg, 0, sizeof(*reg));
+	kaddr = reg->kaddr;
+	rcu_assign_pointer(reg->kaddr, NULL);
+	synchronize_rcu();
+	memunmap(kaddr);
 
 	return 0;
 }
 
 static int stolen_time_cpu_online(unsigned int cpu)
 {
+	struct pvclock_vcpu_stolen_time *kaddr = NULL;
 	struct pv_time_stolen_time_region *reg;
 	struct arm_smccc_res res;
 
@@ -93,10 +105,12 @@ static int stolen_time_cpu_online(unsigned int cpu)
 	if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
 		return -EINVAL;
 
-	reg->kaddr = memremap(res.a0,
+	kaddr = memremap(res.a0,
 			      sizeof(struct pvclock_vcpu_stolen_time),
 			      MEMREMAP_WB);
 
+	rcu_assign_pointer(reg->kaddr, kaddr);
+
 	if (!reg->kaddr) {
 		pr_warn("Failed to map stolen time data structure\n");
 		return -ENOMEM;
-- 
2.25.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-04-28 18:35 ` Elliot Berman
  (?)
@ 2022-05-04  9:45   ` Will Deacon
  -1 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2022-05-04  9:45 UTC (permalink / raw)
  To: Elliot Berman
  Cc: Juergen Gross, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable

On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>   ...
>   [ 3458.154398][    C5] Call trace:
>   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>   [ 3458.251656][    C5]  __vunmap+0x154/0x278
>   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>  - Use RCU instead of disabling interrupts
> 
>  arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
> index 75fed4460407..e724ea3d86f0 100644
> --- a/arch/arm64/kernel/paravirt.c
> +++ b/arch/arm64/kernel/paravirt.c
> @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
>  /* return stolen time in ns by asking the hypervisor */
>  static u64 para_steal_clock(int cpu)
>  {
> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>  	struct pv_time_stolen_time_region *reg;
> +	u64 ret = 0;
>  
>  	reg = per_cpu_ptr(&stolen_time_region, cpu);
>  
> @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
>  	 * online notification callback runs. Until the callback
>  	 * has run we just return zero.
>  	 */
> -	if (!reg->kaddr)
> +	rcu_read_lock();
> +	kaddr = rcu_dereference(reg->kaddr);
> +	if (!kaddr) {
> +		rcu_read_unlock();
>  		return 0;
> +	}
>  
> -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
> +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));

Is this READ_ONCE() still required now?

Will

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-04  9:45   ` Will Deacon
  0 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2022-05-04  9:45 UTC (permalink / raw)
  To: Elliot Berman
  Cc: Juergen Gross, Murali Nalajala, Catalin Marinas, x86,
	linux-kernel, stable, virtualization, Alexey Makhalov,
	Prakruthi Deepak Heragu, linux-arm-kernel

On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>   ...
>   [ 3458.154398][    C5] Call trace:
>   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>   [ 3458.251656][    C5]  __vunmap+0x154/0x278
>   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>  - Use RCU instead of disabling interrupts
> 
>  arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
> index 75fed4460407..e724ea3d86f0 100644
> --- a/arch/arm64/kernel/paravirt.c
> +++ b/arch/arm64/kernel/paravirt.c
> @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
>  /* return stolen time in ns by asking the hypervisor */
>  static u64 para_steal_clock(int cpu)
>  {
> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>  	struct pv_time_stolen_time_region *reg;
> +	u64 ret = 0;
>  
>  	reg = per_cpu_ptr(&stolen_time_region, cpu);
>  
> @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
>  	 * online notification callback runs. Until the callback
>  	 * has run we just return zero.
>  	 */
> -	if (!reg->kaddr)
> +	rcu_read_lock();
> +	kaddr = rcu_dereference(reg->kaddr);
> +	if (!kaddr) {
> +		rcu_read_unlock();
>  		return 0;
> +	}
>  
> -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
> +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));

Is this READ_ONCE() still required now?

Will
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-04  9:45   ` Will Deacon
  0 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2022-05-04  9:45 UTC (permalink / raw)
  To: Elliot Berman
  Cc: Juergen Gross, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable

On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>   ...
>   [ 3458.154398][    C5] Call trace:
>   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>   [ 3458.251656][    C5]  __vunmap+0x154/0x278
>   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>  - Use RCU instead of disabling interrupts
> 
>  arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
> index 75fed4460407..e724ea3d86f0 100644
> --- a/arch/arm64/kernel/paravirt.c
> +++ b/arch/arm64/kernel/paravirt.c
> @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
>  /* return stolen time in ns by asking the hypervisor */
>  static u64 para_steal_clock(int cpu)
>  {
> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>  	struct pv_time_stolen_time_region *reg;
> +	u64 ret = 0;
>  
>  	reg = per_cpu_ptr(&stolen_time_region, cpu);
>  
> @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
>  	 * online notification callback runs. Until the callback
>  	 * has run we just return zero.
>  	 */
> -	if (!reg->kaddr)
> +	rcu_read_lock();
> +	kaddr = rcu_dereference(reg->kaddr);
> +	if (!kaddr) {
> +		rcu_read_unlock();
>  		return 0;
> +	}
>  
> -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
> +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));

Is this READ_ONCE() still required now?

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-05-04  9:45   ` Will Deacon
  (?)
@ 2022-05-04 13:38     ` Juergen Gross via Virtualization
  -1 siblings, 0 replies; 17+ messages in thread
From: Juergen Gross @ 2022-05-04 13:38 UTC (permalink / raw)
  To: Will Deacon, Elliot Berman
  Cc: Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable


[-- Attachment #1.1.1: Type: text/plain, Size: 4012 bytes --]

On 04.05.22 11:45, Will Deacon wrote:
> On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
>> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>>
>> During hotplug, the stolen time data structure is unmapped and memset.
>> There is a possibility of the timer IRQ being triggered before memset
>> and stolen time is getting updated as part of this timer IRQ handler. This
>> causes the below crash in timer handler -
>>
>>    [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>>    ...
>>    [ 3458.154398][    C5] Call trace:
>>    [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>>    [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>>    [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>>    [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>>    [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>>    [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>>    [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>>    [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>>    [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>>    [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>>    [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>>    [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>>    [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>>    [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>>    [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>>    [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>>    [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>>    [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>>    [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>>    [ 3458.251656][    C5]  __vunmap+0x154/0x278
>>    [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>>    [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>>    [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>>    [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>>    [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>>    [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
>>
>> As a fix, introduce rcu lock to update stolen time structure.
>>
>> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
>> ---
>> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>>   - Use RCU instead of disabling interrupts
>>
>>   arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>>   1 file changed, 19 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
>> index 75fed4460407..e724ea3d86f0 100644
>> --- a/arch/arm64/kernel/paravirt.c
>> +++ b/arch/arm64/kernel/paravirt.c
>> @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
>>   /* return stolen time in ns by asking the hypervisor */
>>   static u64 para_steal_clock(int cpu)
>>   {
>> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>>   	struct pv_time_stolen_time_region *reg;
>> +	u64 ret = 0;
>>   
>>   	reg = per_cpu_ptr(&stolen_time_region, cpu);
>>   
>> @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
>>   	 * online notification callback runs. Until the callback
>>   	 * has run we just return zero.
>>   	 */
>> -	if (!reg->kaddr)
>> +	rcu_read_lock();
>> +	kaddr = rcu_dereference(reg->kaddr);
>> +	if (!kaddr) {
>> +		rcu_read_unlock();
>>   		return 0;
>> +	}
>>   
>> -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
>> +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
> 
> Is this READ_ONCE() still required now?

Yes, as it might be called for another cpu than the current one.
stolen_time might just be updated, so you want to avoid load tearing.


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-04 13:38     ` Juergen Gross via Virtualization
  0 siblings, 0 replies; 17+ messages in thread
From: Juergen Gross via Virtualization @ 2022-05-04 13:38 UTC (permalink / raw)
  To: Will Deacon, Elliot Berman
  Cc: Murali Nalajala, Catalin Marinas, x86, linux-kernel, stable,
	virtualization, Alexey Makhalov, Prakruthi Deepak Heragu,
	linux-arm-kernel


[-- Attachment #1.1.1.1: Type: text/plain, Size: 4012 bytes --]

On 04.05.22 11:45, Will Deacon wrote:
> On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
>> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>>
>> During hotplug, the stolen time data structure is unmapped and memset.
>> There is a possibility of the timer IRQ being triggered before memset
>> and stolen time is getting updated as part of this timer IRQ handler. This
>> causes the below crash in timer handler -
>>
>>    [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>>    ...
>>    [ 3458.154398][    C5] Call trace:
>>    [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>>    [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>>    [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>>    [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>>    [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>>    [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>>    [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>>    [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>>    [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>>    [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>>    [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>>    [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>>    [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>>    [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>>    [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>>    [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>>    [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>>    [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>>    [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>>    [ 3458.251656][    C5]  __vunmap+0x154/0x278
>>    [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>>    [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>>    [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>>    [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>>    [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>>    [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
>>
>> As a fix, introduce rcu lock to update stolen time structure.
>>
>> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
>> ---
>> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>>   - Use RCU instead of disabling interrupts
>>
>>   arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>>   1 file changed, 19 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
>> index 75fed4460407..e724ea3d86f0 100644
>> --- a/arch/arm64/kernel/paravirt.c
>> +++ b/arch/arm64/kernel/paravirt.c
>> @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
>>   /* return stolen time in ns by asking the hypervisor */
>>   static u64 para_steal_clock(int cpu)
>>   {
>> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>>   	struct pv_time_stolen_time_region *reg;
>> +	u64 ret = 0;
>>   
>>   	reg = per_cpu_ptr(&stolen_time_region, cpu);
>>   
>> @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
>>   	 * online notification callback runs. Until the callback
>>   	 * has run we just return zero.
>>   	 */
>> -	if (!reg->kaddr)
>> +	rcu_read_lock();
>> +	kaddr = rcu_dereference(reg->kaddr);
>> +	if (!kaddr) {
>> +		rcu_read_unlock();
>>   		return 0;
>> +	}
>>   
>> -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
>> +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
> 
> Is this READ_ONCE() still required now?

Yes, as it might be called for another cpu than the current one.
stolen_time might just be updated, so you want to avoid load tearing.


Juergen

[-- Attachment #1.1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]

[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-04 13:38     ` Juergen Gross via Virtualization
  0 siblings, 0 replies; 17+ messages in thread
From: Juergen Gross @ 2022-05-04 13:38 UTC (permalink / raw)
  To: Will Deacon, Elliot Berman
  Cc: Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable


[-- Attachment #1.1.1.1: Type: text/plain, Size: 4012 bytes --]

On 04.05.22 11:45, Will Deacon wrote:
> On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
>> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>>
>> During hotplug, the stolen time data structure is unmapped and memset.
>> There is a possibility of the timer IRQ being triggered before memset
>> and stolen time is getting updated as part of this timer IRQ handler. This
>> causes the below crash in timer handler -
>>
>>    [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>>    ...
>>    [ 3458.154398][    C5] Call trace:
>>    [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>>    [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>>    [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>>    [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>>    [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>>    [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>>    [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>>    [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>>    [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>>    [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>>    [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>>    [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>>    [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>>    [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>>    [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>>    [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>>    [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>>    [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>>    [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>>    [ 3458.251656][    C5]  __vunmap+0x154/0x278
>>    [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>>    [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>>    [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>>    [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>>    [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>>    [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
>>
>> As a fix, introduce rcu lock to update stolen time structure.
>>
>> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
>> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
>> ---
>> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>>   - Use RCU instead of disabling interrupts
>>
>>   arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>>   1 file changed, 19 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
>> index 75fed4460407..e724ea3d86f0 100644
>> --- a/arch/arm64/kernel/paravirt.c
>> +++ b/arch/arm64/kernel/paravirt.c
>> @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
>>   /* return stolen time in ns by asking the hypervisor */
>>   static u64 para_steal_clock(int cpu)
>>   {
>> +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
>>   	struct pv_time_stolen_time_region *reg;
>> +	u64 ret = 0;
>>   
>>   	reg = per_cpu_ptr(&stolen_time_region, cpu);
>>   
>> @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
>>   	 * online notification callback runs. Until the callback
>>   	 * has run we just return zero.
>>   	 */
>> -	if (!reg->kaddr)
>> +	rcu_read_lock();
>> +	kaddr = rcu_dereference(reg->kaddr);
>> +	if (!kaddr) {
>> +		rcu_read_unlock();
>>   		return 0;
>> +	}
>>   
>> -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
>> +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
> 
> Is this READ_ONCE() still required now?

Yes, as it might be called for another cpu than the current one.
stolen_time might just be updated, so you want to avoid load tearing.


Juergen

[-- Attachment #1.1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]

[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 176 bytes --]

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-04-28 18:35 ` Elliot Berman
  (?)
@ 2022-05-04 13:40   ` Juergen Gross via Virtualization
  -1 siblings, 0 replies; 17+ messages in thread
From: Juergen Gross @ 2022-05-04 13:40 UTC (permalink / raw)
  To: Elliot Berman, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Will Deacon
  Cc: Prakruthi Deepak Heragu, virtualization, x86, linux-arm-kernel,
	linux-kernel, Murali Nalajala, stable


[-- Attachment #1.1.1: Type: text/plain, Size: 2459 bytes --]

On 28.04.22 20:35, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>    [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>    ...
>    [ 3458.154398][    C5] Call trace:
>    [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>    [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>    [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>    [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>    [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>    [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>    [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>    [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>    [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>    [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>    [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>    [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>    [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>    [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>    [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>    [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>    [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>    [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>    [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>    [ 3458.251656][    C5]  __vunmap+0x154/0x278
>    [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>    [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>    [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>    [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>    [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>    [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-04 13:40   ` Juergen Gross via Virtualization
  0 siblings, 0 replies; 17+ messages in thread
From: Juergen Gross via Virtualization @ 2022-05-04 13:40 UTC (permalink / raw)
  To: Elliot Berman, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Will Deacon
  Cc: Murali Nalajala, x86, linux-kernel, stable, virtualization,
	Prakruthi Deepak Heragu, linux-arm-kernel


[-- Attachment #1.1.1.1: Type: text/plain, Size: 2459 bytes --]

On 28.04.22 20:35, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>    [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>    ...
>    [ 3458.154398][    C5] Call trace:
>    [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>    [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>    [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>    [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>    [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>    [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>    [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>    [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>    [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>    [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>    [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>    [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>    [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>    [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>    [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>    [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>    [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>    [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>    [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>    [ 3458.251656][    C5]  __vunmap+0x154/0x278
>    [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>    [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>    [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>    [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>    [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>    [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

[-- Attachment #1.1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]

[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-04 13:40   ` Juergen Gross via Virtualization
  0 siblings, 0 replies; 17+ messages in thread
From: Juergen Gross @ 2022-05-04 13:40 UTC (permalink / raw)
  To: Elliot Berman, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Will Deacon
  Cc: Prakruthi Deepak Heragu, virtualization, x86, linux-arm-kernel,
	linux-kernel, Murali Nalajala, stable


[-- Attachment #1.1.1.1: Type: text/plain, Size: 2459 bytes --]

On 28.04.22 20:35, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>    [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>    ...
>    [ 3458.154398][    C5] Call trace:
>    [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>    [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>    [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>    [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>    [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>    [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>    [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>    [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>    [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>    [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>    [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>    [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>    [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>    [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>    [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>    [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>    [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>    [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>    [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>    [ 3458.251656][    C5]  __vunmap+0x154/0x278
>    [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>    [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>    [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>    [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>    [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>    [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

[-- Attachment #1.1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 3149 bytes --]

[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 176 bytes --]

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-05-04 13:38     ` Juergen Gross via Virtualization
  (?)
@ 2022-05-04 13:50       ` Will Deacon
  -1 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2022-05-04 13:50 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Elliot Berman, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable

On Wed, May 04, 2022 at 03:38:47PM +0200, Juergen Gross wrote:
> On 04.05.22 11:45, Will Deacon wrote:
> > On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> > > diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
> > > index 75fed4460407..e724ea3d86f0 100644
> > > --- a/arch/arm64/kernel/paravirt.c
> > > +++ b/arch/arm64/kernel/paravirt.c
> > > @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
> > >   /* return stolen time in ns by asking the hypervisor */
> > >   static u64 para_steal_clock(int cpu)
> > >   {
> > > +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
> > >   	struct pv_time_stolen_time_region *reg;
> > > +	u64 ret = 0;
> > >   	reg = per_cpu_ptr(&stolen_time_region, cpu);
> > > @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
> > >   	 * online notification callback runs. Until the callback
> > >   	 * has run we just return zero.
> > >   	 */
> > > -	if (!reg->kaddr)
> > > +	rcu_read_lock();
> > > +	kaddr = rcu_dereference(reg->kaddr);
> > > +	if (!kaddr) {
> > > +		rcu_read_unlock();
> > >   		return 0;
> > > +	}
> > > -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
> > > +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
> > 
> > Is this READ_ONCE() still required now?
> 
> Yes, as it might be called for another cpu than the current one.
> stolen_time might just be updated, so you want to avoid load tearing.

Ah yes, thanks. The lifetime of the structure is one thing, but the
stolen time field is updated much more regularly than the kaddr pointer.

So:

Acked-by: Will Deacon <will@kernel.org>

Cheers,

Will

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-04 13:50       ` Will Deacon
  0 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2022-05-04 13:50 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Murali Nalajala, Elliot Berman, Catalin Marinas, x86,
	linux-kernel, stable, virtualization, Alexey Makhalov,
	Prakruthi Deepak Heragu, linux-arm-kernel

On Wed, May 04, 2022 at 03:38:47PM +0200, Juergen Gross wrote:
> On 04.05.22 11:45, Will Deacon wrote:
> > On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> > > diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
> > > index 75fed4460407..e724ea3d86f0 100644
> > > --- a/arch/arm64/kernel/paravirt.c
> > > +++ b/arch/arm64/kernel/paravirt.c
> > > @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
> > >   /* return stolen time in ns by asking the hypervisor */
> > >   static u64 para_steal_clock(int cpu)
> > >   {
> > > +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
> > >   	struct pv_time_stolen_time_region *reg;
> > > +	u64 ret = 0;
> > >   	reg = per_cpu_ptr(&stolen_time_region, cpu);
> > > @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
> > >   	 * online notification callback runs. Until the callback
> > >   	 * has run we just return zero.
> > >   	 */
> > > -	if (!reg->kaddr)
> > > +	rcu_read_lock();
> > > +	kaddr = rcu_dereference(reg->kaddr);
> > > +	if (!kaddr) {
> > > +		rcu_read_unlock();
> > >   		return 0;
> > > +	}
> > > -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
> > > +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
> > 
> > Is this READ_ONCE() still required now?
> 
> Yes, as it might be called for another cpu than the current one.
> stolen_time might just be updated, so you want to avoid load tearing.

Ah yes, thanks. The lifetime of the structure is one thing, but the
stolen time field is updated much more regularly than the kaddr pointer.

So:

Acked-by: Will Deacon <will@kernel.org>

Cheers,

Will
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-04 13:50       ` Will Deacon
  0 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2022-05-04 13:50 UTC (permalink / raw)
  To: Juergen Gross
  Cc: Elliot Berman, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable

On Wed, May 04, 2022 at 03:38:47PM +0200, Juergen Gross wrote:
> On 04.05.22 11:45, Will Deacon wrote:
> > On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> > > diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
> > > index 75fed4460407..e724ea3d86f0 100644
> > > --- a/arch/arm64/kernel/paravirt.c
> > > +++ b/arch/arm64/kernel/paravirt.c
> > > @@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_stealacc);
> > >   /* return stolen time in ns by asking the hypervisor */
> > >   static u64 para_steal_clock(int cpu)
> > >   {
> > > +	struct pvclock_vcpu_stolen_time *kaddr = NULL;
> > >   	struct pv_time_stolen_time_region *reg;
> > > +	u64 ret = 0;
> > >   	reg = per_cpu_ptr(&stolen_time_region, cpu);
> > > @@ -61,28 +63,38 @@ static u64 para_steal_clock(int cpu)
> > >   	 * online notification callback runs. Until the callback
> > >   	 * has run we just return zero.
> > >   	 */
> > > -	if (!reg->kaddr)
> > > +	rcu_read_lock();
> > > +	kaddr = rcu_dereference(reg->kaddr);
> > > +	if (!kaddr) {
> > > +		rcu_read_unlock();
> > >   		return 0;
> > > +	}
> > > -	return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
> > > +	ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time));
> > 
> > Is this READ_ONCE() still required now?
> 
> Yes, as it might be called for another cpu than the current one.
> stolen_time might just be updated, so you want to avoid load tearing.

Ah yes, thanks. The lifetime of the structure is one thing, but the
stolen time field is updated much more regularly than the kaddr pointer.

So:

Acked-by: Will Deacon <will@kernel.org>

Cheers,

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
  2022-04-28 18:35 ` Elliot Berman
  (?)
@ 2022-05-05 10:49   ` Will Deacon
  -1 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2022-05-05 10:49 UTC (permalink / raw)
  To: Elliot Berman
  Cc: Juergen Gross, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable

Hi Elliot,

On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>   ...
>   [ 3458.154398][    C5] Call trace:
>   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>   [ 3458.251656][    C5]  __vunmap+0x154/0x278
>   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>  - Use RCU instead of disabling interrupts
> 
>  arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)

I applied this locally, but sparse is complaining because the 'kaddr' field
of 'struct pv_time_stolen_time_region' is missing an '__rcu' annotation:

 | arch/arm64/kernel/paravirt.c:112:9: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:112:9: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:112:9:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:112:9:    struct pvclock_vcpu_stolen_time * [sparse]
 | arch/arm64/kernel/paravirt.c:67:17: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:67:17: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:67:17:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:67:17:    struct pvclock_vcpu_stolen_time * [sparse]
 | arch/arm64/kernel/paravirt.c:88:9: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:88:9: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:88:9:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:88:9:    struct pvclock_vcpu_stolen_time * [sparse]

The diff below seems to make it happy again, but please can you take a
look?

Cheers,

Will

--->8

diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index e724ea3d86f0..57c7c211f8c7 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
-       struct pvclock_vcpu_stolen_time *kaddr;
+       struct pvclock_vcpu_stolen_time __rcu *kaddr;
 };
 
 static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -84,8 +84,7 @@ static int stolen_time_cpu_down_prepare(unsigned int cpu)
        if (!reg->kaddr)
                return 0;
 
-       kaddr = reg->kaddr;
-       rcu_assign_pointer(reg->kaddr, NULL);
+       kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
        synchronize_rcu();
        memunmap(kaddr);
 
@@ -116,8 +115,8 @@ static int stolen_time_cpu_online(unsigned int cpu)
                return -ENOMEM;
        }
 
-       if (le32_to_cpu(reg->kaddr->revision) != 0 ||
-           le32_to_cpu(reg->kaddr->attributes) != 0) {
+       if (le32_to_cpu(kaddr->revision) != 0 ||
+           le32_to_cpu(kaddr->attributes) != 0) {
                pr_warn_once("Unexpected revision or attributes in stolen time data\n");
                return -ENXIO;
        }


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-05 10:49   ` Will Deacon
  0 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2022-05-05 10:49 UTC (permalink / raw)
  To: Elliot Berman
  Cc: Juergen Gross, Murali Nalajala, Catalin Marinas, x86,
	linux-kernel, stable, virtualization, Alexey Makhalov,
	Prakruthi Deepak Heragu, linux-arm-kernel

Hi Elliot,

On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>   ...
>   [ 3458.154398][    C5] Call trace:
>   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>   [ 3458.251656][    C5]  __vunmap+0x154/0x278
>   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>  - Use RCU instead of disabling interrupts
> 
>  arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)

I applied this locally, but sparse is complaining because the 'kaddr' field
of 'struct pv_time_stolen_time_region' is missing an '__rcu' annotation:

 | arch/arm64/kernel/paravirt.c:112:9: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:112:9: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:112:9:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:112:9:    struct pvclock_vcpu_stolen_time * [sparse]
 | arch/arm64/kernel/paravirt.c:67:17: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:67:17: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:67:17:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:67:17:    struct pvclock_vcpu_stolen_time * [sparse]
 | arch/arm64/kernel/paravirt.c:88:9: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:88:9: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:88:9:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:88:9:    struct pvclock_vcpu_stolen_time * [sparse]

The diff below seems to make it happy again, but please can you take a
look?

Cheers,

Will

--->8

diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index e724ea3d86f0..57c7c211f8c7 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
-       struct pvclock_vcpu_stolen_time *kaddr;
+       struct pvclock_vcpu_stolen_time __rcu *kaddr;
 };
 
 static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -84,8 +84,7 @@ static int stolen_time_cpu_down_prepare(unsigned int cpu)
        if (!reg->kaddr)
                return 0;
 
-       kaddr = reg->kaddr;
-       rcu_assign_pointer(reg->kaddr, NULL);
+       kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
        synchronize_rcu();
        memunmap(kaddr);
 
@@ -116,8 +115,8 @@ static int stolen_time_cpu_online(unsigned int cpu)
                return -ENOMEM;
        }
 
-       if (le32_to_cpu(reg->kaddr->revision) != 0 ||
-           le32_to_cpu(reg->kaddr->attributes) != 0) {
+       if (le32_to_cpu(kaddr->revision) != 0 ||
+           le32_to_cpu(kaddr->attributes) != 0) {
                pr_warn_once("Unexpected revision or attributes in stolen time data\n");
                return -ENXIO;
        }

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time
@ 2022-05-05 10:49   ` Will Deacon
  0 siblings, 0 replies; 17+ messages in thread
From: Will Deacon @ 2022-05-05 10:49 UTC (permalink / raw)
  To: Elliot Berman
  Cc: Juergen Gross, Srivatsa S. Bhat (VMware),
	Alexey Makhalov, Catalin Marinas, Prakruthi Deepak Heragu,
	virtualization, x86, linux-arm-kernel, linux-kernel,
	Murali Nalajala, stable

Hi Elliot,

On Thu, Apr 28, 2022 at 11:35:36AM -0700, Elliot Berman wrote:
> From: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> 
> During hotplug, the stolen time data structure is unmapped and memset.
> There is a possibility of the timer IRQ being triggered before memset
> and stolen time is getting updated as part of this timer IRQ handler. This
> causes the below crash in timer handler -
> 
>   [ 3457.473139][    C5] Unable to handle kernel paging request at virtual address ffffffc03df05148
>   ...
>   [ 3458.154398][    C5] Call trace:
>   [ 3458.157648][    C5]  para_steal_clock+0x30/0x50
>   [ 3458.162319][    C5]  irqtime_account_process_tick+0x30/0x194
>   [ 3458.168148][    C5]  account_process_tick+0x3c/0x280
>   [ 3458.173274][    C5]  update_process_times+0x5c/0xf4
>   [ 3458.178311][    C5]  tick_sched_timer+0x180/0x384
>   [ 3458.183164][    C5]  __run_hrtimer+0x160/0x57c
>   [ 3458.187744][    C5]  hrtimer_interrupt+0x258/0x684
>   [ 3458.192698][    C5]  arch_timer_handler_virt+0x5c/0xa0
>   [ 3458.198002][    C5]  handle_percpu_devid_irq+0xdc/0x414
>   [ 3458.203385][    C5]  handle_domain_irq+0xa8/0x168
>   [ 3458.208241][    C5]  gic_handle_irq.34493+0x54/0x244
>   [ 3458.213359][    C5]  call_on_irq_stack+0x40/0x70
>   [ 3458.218125][    C5]  do_interrupt_handler+0x60/0x9c
>   [ 3458.223156][    C5]  el1_interrupt+0x34/0x64
>   [ 3458.227560][    C5]  el1h_64_irq_handler+0x1c/0x2c
>   [ 3458.232503][    C5]  el1h_64_irq+0x7c/0x80
>   [ 3458.236736][    C5]  free_vmap_area_noflush+0x108/0x39c
>   [ 3458.242126][    C5]  remove_vm_area+0xbc/0x118
>   [ 3458.246714][    C5]  vm_remove_mappings+0x48/0x2a4
>   [ 3458.251656][    C5]  __vunmap+0x154/0x278
>   [ 3458.255796][    C5]  stolen_time_cpu_down_prepare+0xc0/0xd8
>   [ 3458.261542][    C5]  cpuhp_invoke_callback+0x248/0xc34
>   [ 3458.266842][    C5]  cpuhp_thread_fun+0x1c4/0x248
>   [ 3458.271696][    C5]  smpboot_thread_fn+0x1b0/0x400
>   [ 3458.276638][    C5]  kthread+0x17c/0x1e0
>   [ 3458.280691][    C5]  ret_from_fork+0x10/0x20
> 
> As a fix, introduce rcu lock to update stolen time structure.
> 
> Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online")
> Cc: stable@vger.kernel.org
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
> Changes since v1: https://lore.kernel.org/all/20220420204417.155194-1-quic_eberman@quicinc.com/
>  - Use RCU instead of disabling interrupts
> 
>  arch/arm64/kernel/paravirt.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)

I applied this locally, but sparse is complaining because the 'kaddr' field
of 'struct pv_time_stolen_time_region' is missing an '__rcu' annotation:

 | arch/arm64/kernel/paravirt.c:112:9: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:112:9: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:112:9:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:112:9:    struct pvclock_vcpu_stolen_time * [sparse]
 | arch/arm64/kernel/paravirt.c:67:17: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:67:17: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:67:17:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:67:17:    struct pvclock_vcpu_stolen_time * [sparse]
 | arch/arm64/kernel/paravirt.c:88:9: warning: cast adds address space '__rcu' to expression [sparse]
 | arch/arm64/kernel/paravirt.c:88:9: error: incompatible types in comparison expression (different address spaces): [sparse]
 | arch/arm64/kernel/paravirt.c:88:9:    struct pvclock_vcpu_stolen_time [noderef] __rcu * [sparse]
 | arch/arm64/kernel/paravirt.c:88:9:    struct pvclock_vcpu_stolen_time * [sparse]

The diff below seems to make it happy again, but please can you take a
look?

Cheers,

Will

--->8

diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index e724ea3d86f0..57c7c211f8c7 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu)
 DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 
 struct pv_time_stolen_time_region {
-       struct pvclock_vcpu_stolen_time *kaddr;
+       struct pvclock_vcpu_stolen_time __rcu *kaddr;
 };
 
 static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
@@ -84,8 +84,7 @@ static int stolen_time_cpu_down_prepare(unsigned int cpu)
        if (!reg->kaddr)
                return 0;
 
-       kaddr = reg->kaddr;
-       rcu_assign_pointer(reg->kaddr, NULL);
+       kaddr = rcu_replace_pointer(reg->kaddr, NULL, true);
        synchronize_rcu();
        memunmap(kaddr);
 
@@ -116,8 +115,8 @@ static int stolen_time_cpu_online(unsigned int cpu)
                return -ENOMEM;
        }
 
-       if (le32_to_cpu(reg->kaddr->revision) != 0 ||
-           le32_to_cpu(reg->kaddr->attributes) != 0) {
+       if (le32_to_cpu(kaddr->revision) != 0 ||
+           le32_to_cpu(kaddr->attributes) != 0) {
                pr_warn_once("Unexpected revision or attributes in stolen time data\n");
                return -ENXIO;
        }


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2022-05-05 10:50 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-28 18:35 [PATCH v2] arm64: paravirt: Use RCU read locks to guard stolen_time Elliot Berman
2022-04-28 18:35 ` Elliot Berman
2022-05-04  9:45 ` Will Deacon
2022-05-04  9:45   ` Will Deacon
2022-05-04  9:45   ` Will Deacon
2022-05-04 13:38   ` Juergen Gross
2022-05-04 13:38     ` Juergen Gross
2022-05-04 13:38     ` Juergen Gross via Virtualization
2022-05-04 13:50     ` Will Deacon
2022-05-04 13:50       ` Will Deacon
2022-05-04 13:50       ` Will Deacon
2022-05-04 13:40 ` Juergen Gross
2022-05-04 13:40   ` Juergen Gross
2022-05-04 13:40   ` Juergen Gross via Virtualization
2022-05-05 10:49 ` Will Deacon
2022-05-05 10:49   ` Will Deacon
2022-05-05 10:49   ` Will Deacon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.