* [PATCH v2 01/11] KVM: arm64: Store vcpu on the stack during __guest_enter()
2017-08-08 16:46 ` James Morse
@ 2017-08-08 16:46 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
Cc: kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho, James Morse
KVM uses tpidr_el2 as its private vcpu register, which makes sense for
non-vhe world switch as only KVM can access this register. This means
vhe Linux has to use tpidr_el1, which KVM has to save/restore as part
of the host context.
__guest_enter() stores the host_ctxt on the stack, do the same with
the vcpu.
Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
Reviewed-by: Christoffer Dall <cdall-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
---
arch/arm64/kvm/hyp/entry.S | 10 +++++++---
arch/arm64/kvm/hyp/hyp-entry.S | 6 +++---
2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 12ee62d6d410..9a8ab5dddd9e 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,8 +62,8 @@ ENTRY(__guest_enter)
// Store the host regs
save_callee_saved_regs x1
- // Store the host_ctxt for use at exit time
- str x1, [sp, #-16]!
+ // Store host_ctxt and vcpu for use at exit time
+ stp x1, x0, [sp, #-16]!
add x18, x0, #VCPU_CONTEXT
@@ -159,6 +159,10 @@ abort_guest_exit_end:
ENDPROC(__guest_exit)
ENTRY(__fpsimd_guest_restore)
+ // x0: esr
+ // x1: vcpu
+ // x2-x29,lr: vcpu regs
+ // vcpu x0-x1 on the stack
stp x2, x3, [sp, #-16]!
stp x4, lr, [sp, #-16]!
@@ -173,7 +177,7 @@ alternative_else
alternative_endif
isb
- mrs x3, tpidr_el2
+ mov x3, x1
ldr x0, [x3, #VCPU_HOST_CONTEXT]
kern_hyp_va x0
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 5170ce1021da..fce7cc507e0a 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -104,6 +104,7 @@ el1_trap:
/*
* x0: ESR_EC
*/
+ ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter
/*
* We trap the first access to the FP/SIMD to save the host context
@@ -116,19 +117,18 @@ alternative_if_not ARM64_HAS_NO_FPSIMD
b.eq __fpsimd_guest_restore
alternative_else_nop_endif
- mrs x1, tpidr_el2
mov x0, #ARM_EXCEPTION_TRAP
b __guest_exit
el1_irq:
stp x0, x1, [sp, #-16]!
- mrs x1, tpidr_el2
+ ldr x1, [sp, #16 + 8]
mov x0, #ARM_EXCEPTION_IRQ
b __guest_exit
el1_error:
stp x0, x1, [sp, #-16]!
- mrs x1, tpidr_el2
+ ldr x1, [sp, #16 + 8]
mov x0, #ARM_EXCEPTION_EL1_SERROR
b __guest_exit
--
2.13.3
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 01/11] KVM: arm64: Store vcpu on the stack during __guest_enter()
@ 2017-08-08 16:46 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel
KVM uses tpidr_el2 as its private vcpu register, which makes sense for
non-vhe world switch as only KVM can access this register. This means
vhe Linux has to use tpidr_el1, which KVM has to save/restore as part
of the host context.
__guest_enter() stores the host_ctxt on the stack, do the same with
the vcpu.
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
arch/arm64/kvm/hyp/entry.S | 10 +++++++---
arch/arm64/kvm/hyp/hyp-entry.S | 6 +++---
2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 12ee62d6d410..9a8ab5dddd9e 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,8 +62,8 @@ ENTRY(__guest_enter)
// Store the host regs
save_callee_saved_regs x1
- // Store the host_ctxt for use at exit time
- str x1, [sp, #-16]!
+ // Store host_ctxt and vcpu for use at exit time
+ stp x1, x0, [sp, #-16]!
add x18, x0, #VCPU_CONTEXT
@@ -159,6 +159,10 @@ abort_guest_exit_end:
ENDPROC(__guest_exit)
ENTRY(__fpsimd_guest_restore)
+ // x0: esr
+ // x1: vcpu
+ // x2-x29,lr: vcpu regs
+ // vcpu x0-x1 on the stack
stp x2, x3, [sp, #-16]!
stp x4, lr, [sp, #-16]!
@@ -173,7 +177,7 @@ alternative_else
alternative_endif
isb
- mrs x3, tpidr_el2
+ mov x3, x1
ldr x0, [x3, #VCPU_HOST_CONTEXT]
kern_hyp_va x0
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 5170ce1021da..fce7cc507e0a 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -104,6 +104,7 @@ el1_trap:
/*
* x0: ESR_EC
*/
+ ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter
/*
* We trap the first access to the FP/SIMD to save the host context
@@ -116,19 +117,18 @@ alternative_if_not ARM64_HAS_NO_FPSIMD
b.eq __fpsimd_guest_restore
alternative_else_nop_endif
- mrs x1, tpidr_el2
mov x0, #ARM_EXCEPTION_TRAP
b __guest_exit
el1_irq:
stp x0, x1, [sp, #-16]!
- mrs x1, tpidr_el2
+ ldr x1, [sp, #16 + 8]
mov x0, #ARM_EXCEPTION_IRQ
b __guest_exit
el1_error:
stp x0, x1, [sp, #-16]!
- mrs x1, tpidr_el2
+ ldr x1, [sp, #16 + 8]
mov x0, #ARM_EXCEPTION_EL1_SERROR
b __guest_exit
--
2.13.3
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 03/11] KVM: arm64: Change hyp_panic()s dependency on tpidr_el2
2017-08-08 16:46 ` James Morse
@ 2017-08-08 16:46 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
Cc: kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho, James Morse
Make tpidr_el2 a cpu-offset for per-cpu variables in the same way the
host uses tpidr_el1. This lets tpidr_el{1,2} have the same value, and
on VHE they can be the same register.
KVM calls hyp_panic() when anything unexpected happens. This may occur
while a guest owns the EL1 registers. KVM stashes the vcpu pointer in
tpidr_el2, which it uses to find the host context in order to restore
the host EL1 registers before parachuting into the host's panic().
The host context is a struct kvm_cpu_context allocated in the per-cpu
area, and mapped to hyp. Given the per-cpu offset for this CPU, this is
easy to find. Change hyp_panic() to take a pointer to the
struct kvm_cpu_context. Wrap these calls with an asm function that
retrieves the struct kvm_cpu_context from the host's per-cpu area.
Copy the per-cpu offset from the hosts tpidr_el1 into tpidr_el2 during
kvm init. (Later patches will make this unnecessary for VHE hosts)
We print out the vcpu pointer as part of the panic message. Add a back
reference to the 'running vcpu' in the host cpu context to preserve this.
Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
---
Changes since v1:
* Added a comment explaining how =kvm_host_cpu_state gets from a host-va
to a hyp va.
* Added the first paragraph to the commit message.
arch/arm64/include/asm/kvm_host.h | 2 ++
arch/arm64/kvm/hyp/hyp-entry.S | 12 ++++++++++++
arch/arm64/kvm/hyp/s2-setup.c | 3 +++
arch/arm64/kvm/hyp/switch.c | 25 +++++++++++++------------
4 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d68630007b14..7733492d9a35 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -191,6 +191,8 @@ struct kvm_cpu_context {
u64 sys_regs[NR_SYS_REGS];
u32 copro[NR_COPRO_REGS];
};
+
+ struct kvm_vcpu *__hyp_running_vcpu;
};
typedef struct kvm_cpu_context kvm_cpu_context_t;
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index fce7cc507e0a..e4f37b9dd47c 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -163,6 +163,18 @@ ENTRY(__hyp_do_panic)
eret
ENDPROC(__hyp_do_panic)
+ENTRY(__hyp_panic)
+ /*
+ * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
+ * not be accessible by this address from EL2, hyp_panic() converts
+ * it with kern_hyp_va() before use.
+ */
+ ldr x0, =kvm_host_cpu_state
+ mrs x1, tpidr_el2
+ add x0, x0, x1
+ b hyp_panic
+ENDPROC(__hyp_panic)
+
.macro invalid_vector label, target = __hyp_panic
.align 2
\label:
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index b81f4091c909..eb401dbb285e 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -84,5 +84,8 @@ u32 __hyp_text __init_stage2_translation(void)
write_sysreg(val, vtcr_el2);
+ /* copy tpidr_el1 into tpidr_el2 for use by HYP */
+ write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
+
return parange;
}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 945e79c641c4..235d615cee30 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -286,9 +286,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
u64 exit_code;
vcpu = kern_hyp_va(vcpu);
- write_sysreg(vcpu, tpidr_el2);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
__sysreg_save_host_state(host_ctxt);
@@ -393,7 +393,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
-static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_vcpu *vcpu)
{
unsigned long str_va;
@@ -407,35 +408,35 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
__hyp_do_panic(str_va,
spsr, elr,
read_sysreg(esr_el2), read_sysreg_el2(far),
- read_sysreg(hpfar_el2), par,
- (void *)read_sysreg(tpidr_el2));
+ read_sysreg(hpfar_el2), par, vcpu);
}
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_vcpu *vcpu)
{
panic(__hyp_panic_string,
spsr, elr,
read_sysreg_el2(esr), read_sysreg_el2(far),
- read_sysreg(hpfar_el2), par,
- (void *)read_sysreg(tpidr_el2));
+ read_sysreg(hpfar_el2), par, vcpu);
}
static hyp_alternate_select(__hyp_call_panic,
__hyp_call_panic_nvhe, __hyp_call_panic_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
-void __hyp_text __noreturn __hyp_panic(void)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
{
+ struct kvm_vcpu *vcpu = NULL;
+
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
if (read_sysreg(vttbr_el2)) {
- struct kvm_vcpu *vcpu;
struct kvm_cpu_context *host_ctxt;
- vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt = kern_hyp_va(__host_ctxt);
+ vcpu = host_ctxt->__hyp_running_vcpu;
__timer_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
@@ -443,7 +444,7 @@ void __hyp_text __noreturn __hyp_panic(void)
}
/* Call panic for real */
- __hyp_call_panic()(spsr, elr, par);
+ __hyp_call_panic()(spsr, elr, par, vcpu);
unreachable();
}
--
2.13.3
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 03/11] KVM: arm64: Change hyp_panic()s dependency on tpidr_el2
@ 2017-08-08 16:46 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel
Make tpidr_el2 a cpu-offset for per-cpu variables in the same way the
host uses tpidr_el1. This lets tpidr_el{1,2} have the same value, and
on VHE they can be the same register.
KVM calls hyp_panic() when anything unexpected happens. This may occur
while a guest owns the EL1 registers. KVM stashes the vcpu pointer in
tpidr_el2, which it uses to find the host context in order to restore
the host EL1 registers before parachuting into the host's panic().
The host context is a struct kvm_cpu_context allocated in the per-cpu
area, and mapped to hyp. Given the per-cpu offset for this CPU, this is
easy to find. Change hyp_panic() to take a pointer to the
struct kvm_cpu_context. Wrap these calls with an asm function that
retrieves the struct kvm_cpu_context from the host's per-cpu area.
Copy the per-cpu offset from the hosts tpidr_el1 into tpidr_el2 during
kvm init. (Later patches will make this unnecessary for VHE hosts)
We print out the vcpu pointer as part of the panic message. Add a back
reference to the 'running vcpu' in the host cpu context to preserve this.
Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since v1:
* Added a comment explaining how =kvm_host_cpu_state gets from a host-va
to a hyp va.
* Added the first paragraph to the commit message.
arch/arm64/include/asm/kvm_host.h | 2 ++
arch/arm64/kvm/hyp/hyp-entry.S | 12 ++++++++++++
arch/arm64/kvm/hyp/s2-setup.c | 3 +++
arch/arm64/kvm/hyp/switch.c | 25 +++++++++++++------------
4 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d68630007b14..7733492d9a35 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -191,6 +191,8 @@ struct kvm_cpu_context {
u64 sys_regs[NR_SYS_REGS];
u32 copro[NR_COPRO_REGS];
};
+
+ struct kvm_vcpu *__hyp_running_vcpu;
};
typedef struct kvm_cpu_context kvm_cpu_context_t;
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index fce7cc507e0a..e4f37b9dd47c 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -163,6 +163,18 @@ ENTRY(__hyp_do_panic)
eret
ENDPROC(__hyp_do_panic)
+ENTRY(__hyp_panic)
+ /*
+ * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
+ * not be accessible by this address from EL2, hyp_panic() converts
+ * it with kern_hyp_va() before use.
+ */
+ ldr x0, =kvm_host_cpu_state
+ mrs x1, tpidr_el2
+ add x0, x0, x1
+ b hyp_panic
+ENDPROC(__hyp_panic)
+
.macro invalid_vector label, target = __hyp_panic
.align 2
\label:
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index b81f4091c909..eb401dbb285e 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -84,5 +84,8 @@ u32 __hyp_text __init_stage2_translation(void)
write_sysreg(val, vtcr_el2);
+ /* copy tpidr_el1 into tpidr_el2 for use by HYP */
+ write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
+
return parange;
}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 945e79c641c4..235d615cee30 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -286,9 +286,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
u64 exit_code;
vcpu = kern_hyp_va(vcpu);
- write_sysreg(vcpu, tpidr_el2);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
__sysreg_save_host_state(host_ctxt);
@@ -393,7 +393,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
-static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_vcpu *vcpu)
{
unsigned long str_va;
@@ -407,35 +408,35 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
__hyp_do_panic(str_va,
spsr, elr,
read_sysreg(esr_el2), read_sysreg_el2(far),
- read_sysreg(hpfar_el2), par,
- (void *)read_sysreg(tpidr_el2));
+ read_sysreg(hpfar_el2), par, vcpu);
}
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_vcpu *vcpu)
{
panic(__hyp_panic_string,
spsr, elr,
read_sysreg_el2(esr), read_sysreg_el2(far),
- read_sysreg(hpfar_el2), par,
- (void *)read_sysreg(tpidr_el2));
+ read_sysreg(hpfar_el2), par, vcpu);
}
static hyp_alternate_select(__hyp_call_panic,
__hyp_call_panic_nvhe, __hyp_call_panic_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
-void __hyp_text __noreturn __hyp_panic(void)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
{
+ struct kvm_vcpu *vcpu = NULL;
+
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
if (read_sysreg(vttbr_el2)) {
- struct kvm_vcpu *vcpu;
struct kvm_cpu_context *host_ctxt;
- vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt = kern_hyp_va(__host_ctxt);
+ vcpu = host_ctxt->__hyp_running_vcpu;
__timer_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
@@ -443,7 +444,7 @@ void __hyp_text __noreturn __hyp_panic(void)
}
/* Call panic for real */
- __hyp_call_panic()(spsr, elr, par);
+ __hyp_call_panic()(spsr, elr, par, vcpu);
unreachable();
}
--
2.13.3
^ permalink raw reply related [flat|nested] 38+ messages in thread
[parent not found: <20170808164616.25949-4-james.morse-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH v2 03/11] KVM: arm64: Change hyp_panic()s dependency on tpidr_el2
2017-08-08 16:46 ` James Morse
@ 2017-09-17 14:43 ` Christoffer Dall
-1 siblings, 0 replies; 38+ messages in thread
From: Christoffer Dall @ 2017-09-17 14:43 UTC (permalink / raw)
To: James Morse
Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho
On Tue, Aug 08, 2017 at 05:46:08PM +0100, James Morse wrote:
> Make tpidr_el2 a cpu-offset for per-cpu variables in the same way the
> host uses tpidr_el1. This lets tpidr_el{1,2} have the same value, and
> on VHE they can be the same register.
>
> KVM calls hyp_panic() when anything unexpected happens. This may occur
> while a guest owns the EL1 registers. KVM stashes the vcpu pointer in
> tpidr_el2, which it uses to find the host context in order to restore
> the host EL1 registers before parachuting into the host's panic().
>
> The host context is a struct kvm_cpu_context allocated in the per-cpu
> area, and mapped to hyp. Given the per-cpu offset for this CPU, this is
> easy to find. Change hyp_panic() to take a pointer to the
> struct kvm_cpu_context. Wrap these calls with an asm function that
> retrieves the struct kvm_cpu_context from the host's per-cpu area.
>
> Copy the per-cpu offset from the hosts tpidr_el1 into tpidr_el2 during
> kvm init. (Later patches will make this unnecessary for VHE hosts)
>
> We print out the vcpu pointer as part of the panic message. Add a back
> reference to the 'running vcpu' in the host cpu context to preserve this.
>
> Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
> ---
> Changes since v1:
> * Added a comment explaining how =kvm_host_cpu_state gets from a host-va
> to a hyp va.
> * Added the first paragraph to the commit message.
>
> arch/arm64/include/asm/kvm_host.h | 2 ++
> arch/arm64/kvm/hyp/hyp-entry.S | 12 ++++++++++++
> arch/arm64/kvm/hyp/s2-setup.c | 3 +++
> arch/arm64/kvm/hyp/switch.c | 25 +++++++++++++------------
> 4 files changed, 30 insertions(+), 12 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index d68630007b14..7733492d9a35 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -191,6 +191,8 @@ struct kvm_cpu_context {
> u64 sys_regs[NR_SYS_REGS];
> u32 copro[NR_COPRO_REGS];
> };
> +
> + struct kvm_vcpu *__hyp_running_vcpu;
> };
>
> typedef struct kvm_cpu_context kvm_cpu_context_t;
> diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> index fce7cc507e0a..e4f37b9dd47c 100644
> --- a/arch/arm64/kvm/hyp/hyp-entry.S
> +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> @@ -163,6 +163,18 @@ ENTRY(__hyp_do_panic)
> eret
> ENDPROC(__hyp_do_panic)
>
> +ENTRY(__hyp_panic)
> + /*
> + * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> + * not be accessible by this address from EL2, hyp_panic() converts
> + * it with kern_hyp_va() before use.
> + */
> + ldr x0, =kvm_host_cpu_state
> + mrs x1, tpidr_el2
> + add x0, x0, x1
> + b hyp_panic
> +ENDPROC(__hyp_panic)
> +
> .macro invalid_vector label, target = __hyp_panic
> .align 2
> \label:
> diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
> index b81f4091c909..eb401dbb285e 100644
> --- a/arch/arm64/kvm/hyp/s2-setup.c
> +++ b/arch/arm64/kvm/hyp/s2-setup.c
> @@ -84,5 +84,8 @@ u32 __hyp_text __init_stage2_translation(void)
>
> write_sysreg(val, vtcr_el2);
>
> + /* copy tpidr_el1 into tpidr_el2 for use by HYP */
> + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
> +
> return parange;
> }
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 945e79c641c4..235d615cee30 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -286,9 +286,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
> u64 exit_code;
>
> vcpu = kern_hyp_va(vcpu);
> - write_sysreg(vcpu, tpidr_el2);
>
> host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
> + host_ctxt->__hyp_running_vcpu = vcpu;
I'm fine with this for now, but eventually when we optimize KVM further
we may want to avoid doing this in every world-switch. One option is to
just set this pointer in vcpu_get and vcpu_load, but would it also be
possible to use the kvm_arm_running_vcpu per-cpu array directly?
> guest_ctxt = &vcpu->arch.ctxt;
>
> __sysreg_save_host_state(host_ctxt);
> @@ -393,7 +393,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
>
> static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
>
> -static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
> +static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
> + struct kvm_vcpu *vcpu)
> {
> unsigned long str_va;
>
> @@ -407,35 +408,35 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
> __hyp_do_panic(str_va,
> spsr, elr,
> read_sysreg(esr_el2), read_sysreg_el2(far),
> - read_sysreg(hpfar_el2), par,
> - (void *)read_sysreg(tpidr_el2));
> + read_sysreg(hpfar_el2), par, vcpu);
> }
>
> -static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
> +static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
> + struct kvm_vcpu *vcpu)
> {
> panic(__hyp_panic_string,
> spsr, elr,
> read_sysreg_el2(esr), read_sysreg_el2(far),
> - read_sysreg(hpfar_el2), par,
> - (void *)read_sysreg(tpidr_el2));
> + read_sysreg(hpfar_el2), par, vcpu);
> }
>
> static hyp_alternate_select(__hyp_call_panic,
> __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
> ARM64_HAS_VIRT_HOST_EXTN);
>
> -void __hyp_text __noreturn __hyp_panic(void)
> +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> {
> + struct kvm_vcpu *vcpu = NULL;
> +
> u64 spsr = read_sysreg_el2(spsr);
> u64 elr = read_sysreg_el2(elr);
> u64 par = read_sysreg(par_el1);
>
> if (read_sysreg(vttbr_el2)) {
> - struct kvm_vcpu *vcpu;
> struct kvm_cpu_context *host_ctxt;
>
> - vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
> - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
> + host_ctxt = kern_hyp_va(__host_ctxt);
> + vcpu = host_ctxt->__hyp_running_vcpu;
> __timer_save_state(vcpu);
> __deactivate_traps(vcpu);
> __deactivate_vm(vcpu);
> @@ -443,7 +444,7 @@ void __hyp_text __noreturn __hyp_panic(void)
> }
>
> /* Call panic for real */
> - __hyp_call_panic()(spsr, elr, par);
> + __hyp_call_panic()(spsr, elr, par, vcpu);
>
> unreachable();
> }
> --
> 2.13.3
>
Reviewed-by: Christoffer Dall <cdall-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 03/11] KVM: arm64: Change hyp_panic()s dependency on tpidr_el2
@ 2017-09-17 14:43 ` Christoffer Dall
0 siblings, 0 replies; 38+ messages in thread
From: Christoffer Dall @ 2017-09-17 14:43 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Aug 08, 2017 at 05:46:08PM +0100, James Morse wrote:
> Make tpidr_el2 a cpu-offset for per-cpu variables in the same way the
> host uses tpidr_el1. This lets tpidr_el{1,2} have the same value, and
> on VHE they can be the same register.
>
> KVM calls hyp_panic() when anything unexpected happens. This may occur
> while a guest owns the EL1 registers. KVM stashes the vcpu pointer in
> tpidr_el2, which it uses to find the host context in order to restore
> the host EL1 registers before parachuting into the host's panic().
>
> The host context is a struct kvm_cpu_context allocated in the per-cpu
> area, and mapped to hyp. Given the per-cpu offset for this CPU, this is
> easy to find. Change hyp_panic() to take a pointer to the
> struct kvm_cpu_context. Wrap these calls with an asm function that
> retrieves the struct kvm_cpu_context from the host's per-cpu area.
>
> Copy the per-cpu offset from the hosts tpidr_el1 into tpidr_el2 during
> kvm init. (Later patches will make this unnecessary for VHE hosts)
>
> We print out the vcpu pointer as part of the panic message. Add a back
> reference to the 'running vcpu' in the host cpu context to preserve this.
>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v1:
> * Added a comment explaining how =kvm_host_cpu_state gets from a host-va
> to a hyp va.
> * Added the first paragraph to the commit message.
>
> arch/arm64/include/asm/kvm_host.h | 2 ++
> arch/arm64/kvm/hyp/hyp-entry.S | 12 ++++++++++++
> arch/arm64/kvm/hyp/s2-setup.c | 3 +++
> arch/arm64/kvm/hyp/switch.c | 25 +++++++++++++------------
> 4 files changed, 30 insertions(+), 12 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index d68630007b14..7733492d9a35 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -191,6 +191,8 @@ struct kvm_cpu_context {
> u64 sys_regs[NR_SYS_REGS];
> u32 copro[NR_COPRO_REGS];
> };
> +
> + struct kvm_vcpu *__hyp_running_vcpu;
> };
>
> typedef struct kvm_cpu_context kvm_cpu_context_t;
> diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> index fce7cc507e0a..e4f37b9dd47c 100644
> --- a/arch/arm64/kvm/hyp/hyp-entry.S
> +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> @@ -163,6 +163,18 @@ ENTRY(__hyp_do_panic)
> eret
> ENDPROC(__hyp_do_panic)
>
> +ENTRY(__hyp_panic)
> + /*
> + * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> + * not be accessible by this address from EL2, hyp_panic() converts
> + * it with kern_hyp_va() before use.
> + */
> + ldr x0, =kvm_host_cpu_state
> + mrs x1, tpidr_el2
> + add x0, x0, x1
> + b hyp_panic
> +ENDPROC(__hyp_panic)
> +
> .macro invalid_vector label, target = __hyp_panic
> .align 2
> \label:
> diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
> index b81f4091c909..eb401dbb285e 100644
> --- a/arch/arm64/kvm/hyp/s2-setup.c
> +++ b/arch/arm64/kvm/hyp/s2-setup.c
> @@ -84,5 +84,8 @@ u32 __hyp_text __init_stage2_translation(void)
>
> write_sysreg(val, vtcr_el2);
>
> + /* copy tpidr_el1 into tpidr_el2 for use by HYP */
> + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
> +
> return parange;
> }
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 945e79c641c4..235d615cee30 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -286,9 +286,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
> u64 exit_code;
>
> vcpu = kern_hyp_va(vcpu);
> - write_sysreg(vcpu, tpidr_el2);
>
> host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
> + host_ctxt->__hyp_running_vcpu = vcpu;
I'm fine with this for now, but eventually when we optimize KVM further
we may want to avoid doing this in every world-switch. One option is to
just set this pointer in vcpu_get and vcpu_load, but would it also be
possible to use the kvm_arm_running_vcpu per-cpu array directly?
> guest_ctxt = &vcpu->arch.ctxt;
>
> __sysreg_save_host_state(host_ctxt);
> @@ -393,7 +393,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
>
> static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
>
> -static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
> +static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
> + struct kvm_vcpu *vcpu)
> {
> unsigned long str_va;
>
> @@ -407,35 +408,35 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
> __hyp_do_panic(str_va,
> spsr, elr,
> read_sysreg(esr_el2), read_sysreg_el2(far),
> - read_sysreg(hpfar_el2), par,
> - (void *)read_sysreg(tpidr_el2));
> + read_sysreg(hpfar_el2), par, vcpu);
> }
>
> -static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
> +static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
> + struct kvm_vcpu *vcpu)
> {
> panic(__hyp_panic_string,
> spsr, elr,
> read_sysreg_el2(esr), read_sysreg_el2(far),
> - read_sysreg(hpfar_el2), par,
> - (void *)read_sysreg(tpidr_el2));
> + read_sysreg(hpfar_el2), par, vcpu);
> }
>
> static hyp_alternate_select(__hyp_call_panic,
> __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
> ARM64_HAS_VIRT_HOST_EXTN);
>
> -void __hyp_text __noreturn __hyp_panic(void)
> +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> {
> + struct kvm_vcpu *vcpu = NULL;
> +
> u64 spsr = read_sysreg_el2(spsr);
> u64 elr = read_sysreg_el2(elr);
> u64 par = read_sysreg(par_el1);
>
> if (read_sysreg(vttbr_el2)) {
> - struct kvm_vcpu *vcpu;
> struct kvm_cpu_context *host_ctxt;
>
> - vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
> - host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
> + host_ctxt = kern_hyp_va(__host_ctxt);
> + vcpu = host_ctxt->__hyp_running_vcpu;
> __timer_save_state(vcpu);
> __deactivate_traps(vcpu);
> __deactivate_vm(vcpu);
> @@ -443,7 +444,7 @@ void __hyp_text __noreturn __hyp_panic(void)
> }
>
> /* Call panic for real */
> - __hyp_call_panic()(spsr, elr, par);
> + __hyp_call_panic()(spsr, elr, par, vcpu);
>
> unreachable();
> }
> --
> 2.13.3
>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v2 03/11] KVM: arm64: Change hyp_panic()s dependency on tpidr_el2
2017-09-17 14:43 ` Christoffer Dall
@ 2017-09-22 10:53 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-09-22 10:53 UTC (permalink / raw)
To: Christoffer Dall
Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho
Hi Christoffer,
On 17/09/17 15:43, Christoffer Dall wrote:
> On Tue, Aug 08, 2017 at 05:46:08PM +0100, James Morse wrote:
>> Make tpidr_el2 a cpu-offset for per-cpu variables in the same way the
>> host uses tpidr_el1. This lets tpidr_el{1,2} have the same value, and
>> on VHE they can be the same register.
>>
>> KVM calls hyp_panic() when anything unexpected happens. This may occur
>> while a guest owns the EL1 registers. KVM stashes the vcpu pointer in
>> tpidr_el2, which it uses to find the host context in order to restore
>> the host EL1 registers before parachuting into the host's panic().
>>
>> The host context is a struct kvm_cpu_context allocated in the per-cpu
>> area, and mapped to hyp. Given the per-cpu offset for this CPU, this is
>> easy to find. Change hyp_panic() to take a pointer to the
>> struct kvm_cpu_context. Wrap these calls with an asm function that
>> retrieves the struct kvm_cpu_context from the host's per-cpu area.
>>
>> Copy the per-cpu offset from the hosts tpidr_el1 into tpidr_el2 during
>> kvm init. (Later patches will make this unnecessary for VHE hosts)
>>
>> We print out the vcpu pointer as part of the panic message. Add a back
>> reference to the 'running vcpu' in the host cpu context to preserve this.
>> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>> index 945e79c641c4..235d615cee30 100644
>> --- a/arch/arm64/kvm/hyp/switch.c
>> +++ b/arch/arm64/kvm/hyp/switch.c
>> @@ -286,9 +286,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
>> u64 exit_code;
>>
>> vcpu = kern_hyp_va(vcpu);
>> - write_sysreg(vcpu, tpidr_el2);
>>
>> host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
>> + host_ctxt->__hyp_running_vcpu = vcpu;
>
> I'm fine with this for now, but eventually when we optimize KVM further
> we may want to avoid doing this in every world-switch. One option is to
> just set this pointer in vcpu_get and vcpu_load, but would it also be
> possible to use the kvm_arm_running_vcpu per-cpu array directly?
Yes, that would have been better, I didn't know that existed...
After this point we can find per-cpu variables easily, they just need mapping to
HYP.
This pointer is just for the panic message, I'm not sure how useful it is.
For kdump the kvm_arm_running_vcpu array holds the same information, so is
printing this out just so we can spot if its totally-bogus?
As your fine with this for now, I'll put tidying it up on my todo list...
>> guest_ctxt = &vcpu->arch.ctxt;
>>
>> __sysreg_save_host_state(host_ctxt);
> Reviewed-by: Christoffer Dall <cdall-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
Thanks!
James
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 03/11] KVM: arm64: Change hyp_panic()s dependency on tpidr_el2
@ 2017-09-22 10:53 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-09-22 10:53 UTC (permalink / raw)
To: linux-arm-kernel
Hi Christoffer,
On 17/09/17 15:43, Christoffer Dall wrote:
> On Tue, Aug 08, 2017 at 05:46:08PM +0100, James Morse wrote:
>> Make tpidr_el2 a cpu-offset for per-cpu variables in the same way the
>> host uses tpidr_el1. This lets tpidr_el{1,2} have the same value, and
>> on VHE they can be the same register.
>>
>> KVM calls hyp_panic() when anything unexpected happens. This may occur
>> while a guest owns the EL1 registers. KVM stashes the vcpu pointer in
>> tpidr_el2, which it uses to find the host context in order to restore
>> the host EL1 registers before parachuting into the host's panic().
>>
>> The host context is a struct kvm_cpu_context allocated in the per-cpu
>> area, and mapped to hyp. Given the per-cpu offset for this CPU, this is
>> easy to find. Change hyp_panic() to take a pointer to the
>> struct kvm_cpu_context. Wrap these calls with an asm function that
>> retrieves the struct kvm_cpu_context from the host's per-cpu area.
>>
>> Copy the per-cpu offset from the hosts tpidr_el1 into tpidr_el2 during
>> kvm init. (Later patches will make this unnecessary for VHE hosts)
>>
>> We print out the vcpu pointer as part of the panic message. Add a back
>> reference to the 'running vcpu' in the host cpu context to preserve this.
>> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>> index 945e79c641c4..235d615cee30 100644
>> --- a/arch/arm64/kvm/hyp/switch.c
>> +++ b/arch/arm64/kvm/hyp/switch.c
>> @@ -286,9 +286,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
>> u64 exit_code;
>>
>> vcpu = kern_hyp_va(vcpu);
>> - write_sysreg(vcpu, tpidr_el2);
>>
>> host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
>> + host_ctxt->__hyp_running_vcpu = vcpu;
>
> I'm fine with this for now, but eventually when we optimize KVM further
> we may want to avoid doing this in every world-switch. One option is to
> just set this pointer in vcpu_get and vcpu_load, but would it also be
> possible to use the kvm_arm_running_vcpu per-cpu array directly?
Yes, that would have been better, I didn't know that existed...
After this point we can find per-cpu variables easily, they just need mapping to
HYP.
This pointer is just for the panic message, I'm not sure how useful it is.
For kdump the kvm_arm_running_vcpu array holds the same information, so is
printing this out just so we can spot if its totally-bogus?
As your fine with this for now, I'll put tidying it up on my todo list...
>> guest_ctxt = &vcpu->arch.ctxt;
>>
>> __sysreg_save_host_state(host_ctxt);
> Reviewed-by: Christoffer Dall <cdall@linaro.org>
Thanks!
James
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 04/11] arm64: alternatives: use tpidr_el2 on VHE hosts
2017-08-08 16:46 ` James Morse
@ 2017-08-08 16:46 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
Cc: kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho, James Morse
Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in
tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1
on VHE hosts, and allows future code to blindly access per-cpu variables
without triggering world-switch.
Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
---
Changes since v1:
* cpu_copy_el2regs()'s 'have I been patched' test now always sets a register,
just in case the compiler puts do_copyregs on the stack.
arch/arm64/include/asm/assembler.h | 8 ++++++++
arch/arm64/include/asm/percpu.h | 11 +++++++++--
arch/arm64/include/asm/processor.h | 1 +
arch/arm64/kernel/cpufeature.c | 23 +++++++++++++++++++++++
arch/arm64/mm/proc.S | 8 ++++++++
5 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 1b67c3782d00..06252602901e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -236,7 +236,11 @@ lr .req x30 // link register
*/
.macro adr_this_cpu, dst, sym, tmp
adr_l \dst, \sym
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \tmp, tpidr_el1
+alternative_else
+ mrs \tmp, tpidr_el2
+alternative_endif
add \dst, \dst, \tmp
.endm
@@ -247,7 +251,11 @@ lr .req x30 // link register
*/
.macro ldr_this_cpu dst, sym, tmp
adr_l \dst, \sym
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \tmp, tpidr_el1
+alternative_else
+ mrs \tmp, tpidr_el2
+alternative_endif
ldr \dst, [\dst, \tmp]
.endm
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 3bd498e4de4c..43393208229e 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -16,11 +16,15 @@
#ifndef __ASM_PERCPU_H
#define __ASM_PERCPU_H
+#include <asm/alternative.h>
#include <asm/stack_pointer.h>
static inline void set_my_cpu_offset(unsigned long off)
{
- asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
+ asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
+ "msr tpidr_el2, %0",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ :: "r" (off) : "memory");
}
static inline unsigned long __my_cpu_offset(void)
@@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void)
* We want to allow caching the value, so avoid using volatile and
* instead use a fake stack read to hazard against barrier().
*/
- asm("mrs %0, tpidr_el1" : "=r" (off) :
+ asm(ALTERNATIVE("mrs %0, tpidr_el1",
+ "mrs %0, tpidr_el2",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "=r" (off) :
"Q" (*(const unsigned long *)current_stack_pointer));
return off;
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 64c9e78f9882..6d4fdfccf869 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -193,5 +193,6 @@ static inline void spin_lock_prefetch(const void *ptr)
int cpu_enable_pan(void *__unused);
int cpu_enable_cache_maint_trap(void *__unused);
+int cpu_copy_el2regs(void *__unused);
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9f9e0064c8c1..1960706d2422 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -864,6 +864,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_VIRT_HOST_EXTN,
.def_scope = SCOPE_SYSTEM,
.matches = runs_at_el2,
+ .enable = cpu_copy_el2regs,
},
{
.desc = "32-bit EL0 Support",
@@ -1295,3 +1296,25 @@ static int __init enable_mrs_emulation(void)
}
late_initcall(enable_mrs_emulation);
+
+int cpu_copy_el2regs(void *__unused)
+{
+ int do_copyregs = 0;
+
+ /*
+ * Copy register values that aren't redirected by hardware.
+ *
+ * Before code patching, we only set tpidr_el1, all CPUs need to copy
+ * this value to tpidr_el2 before we patch the code. Once we've done
+ * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
+ * do anything here.
+ */
+ asm volatile(ALTERNATIVE("mov %0, #1", "mov %0, #0",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "=r" (do_copyregs) : : );
+
+ if (do_copyregs)
+ write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
+
+ return 0;
+}
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 877d42fb0df6..109d43a15aaf 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend)
mrs x8, mdscr_el1
mrs x9, oslsr_el1
mrs x10, sctlr_el1
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs x11, tpidr_el1
+alternative_else
+ mrs x11, tpidr_el2
+alternative_endif
mrs x12, sp_el0
stp x2, x3, [x0]
stp x4, xzr, [x0, #16]
@@ -116,7 +120,11 @@ ENTRY(cpu_do_resume)
msr mdscr_el1, x10
msr sctlr_el1, x12
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
msr tpidr_el1, x13
+alternative_else
+ msr tpidr_el2, x13
+alternative_endif
msr sp_el0, x14
/*
* Restore oslsr_el1 by writing oslar_el1
--
2.13.3
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 04/11] arm64: alternatives: use tpidr_el2 on VHE hosts
@ 2017-08-08 16:46 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel
Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in
tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1
on VHE hosts, and allows future code to blindly access per-cpu variables
without triggering world-switch.
Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since v1:
* cpu_copy_el2regs()'s 'have I been patched' test now always sets a register,
just in case the compiler puts do_copyregs on the stack.
arch/arm64/include/asm/assembler.h | 8 ++++++++
arch/arm64/include/asm/percpu.h | 11 +++++++++--
arch/arm64/include/asm/processor.h | 1 +
arch/arm64/kernel/cpufeature.c | 23 +++++++++++++++++++++++
arch/arm64/mm/proc.S | 8 ++++++++
5 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 1b67c3782d00..06252602901e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -236,7 +236,11 @@ lr .req x30 // link register
*/
.macro adr_this_cpu, dst, sym, tmp
adr_l \dst, \sym
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \tmp, tpidr_el1
+alternative_else
+ mrs \tmp, tpidr_el2
+alternative_endif
add \dst, \dst, \tmp
.endm
@@ -247,7 +251,11 @@ lr .req x30 // link register
*/
.macro ldr_this_cpu dst, sym, tmp
adr_l \dst, \sym
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \tmp, tpidr_el1
+alternative_else
+ mrs \tmp, tpidr_el2
+alternative_endif
ldr \dst, [\dst, \tmp]
.endm
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 3bd498e4de4c..43393208229e 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -16,11 +16,15 @@
#ifndef __ASM_PERCPU_H
#define __ASM_PERCPU_H
+#include <asm/alternative.h>
#include <asm/stack_pointer.h>
static inline void set_my_cpu_offset(unsigned long off)
{
- asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
+ asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
+ "msr tpidr_el2, %0",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ :: "r" (off) : "memory");
}
static inline unsigned long __my_cpu_offset(void)
@@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void)
* We want to allow caching the value, so avoid using volatile and
* instead use a fake stack read to hazard against barrier().
*/
- asm("mrs %0, tpidr_el1" : "=r" (off) :
+ asm(ALTERNATIVE("mrs %0, tpidr_el1",
+ "mrs %0, tpidr_el2",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "=r" (off) :
"Q" (*(const unsigned long *)current_stack_pointer));
return off;
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 64c9e78f9882..6d4fdfccf869 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -193,5 +193,6 @@ static inline void spin_lock_prefetch(const void *ptr)
int cpu_enable_pan(void *__unused);
int cpu_enable_cache_maint_trap(void *__unused);
+int cpu_copy_el2regs(void *__unused);
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9f9e0064c8c1..1960706d2422 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -864,6 +864,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_VIRT_HOST_EXTN,
.def_scope = SCOPE_SYSTEM,
.matches = runs_at_el2,
+ .enable = cpu_copy_el2regs,
},
{
.desc = "32-bit EL0 Support",
@@ -1295,3 +1296,25 @@ static int __init enable_mrs_emulation(void)
}
late_initcall(enable_mrs_emulation);
+
+int cpu_copy_el2regs(void *__unused)
+{
+ int do_copyregs = 0;
+
+ /*
+ * Copy register values that aren't redirected by hardware.
+ *
+ * Before code patching, we only set tpidr_el1, all CPUs need to copy
+ * this value to tpidr_el2 before we patch the code. Once we've done
+ * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
+ * do anything here.
+ */
+ asm volatile(ALTERNATIVE("mov %0, #1", "mov %0, #0",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "=r" (do_copyregs) : : );
+
+ if (do_copyregs)
+ write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
+
+ return 0;
+}
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 877d42fb0df6..109d43a15aaf 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend)
mrs x8, mdscr_el1
mrs x9, oslsr_el1
mrs x10, sctlr_el1
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs x11, tpidr_el1
+alternative_else
+ mrs x11, tpidr_el2
+alternative_endif
mrs x12, sp_el0
stp x2, x3, [x0]
stp x4, xzr, [x0, #16]
@@ -116,7 +120,11 @@ ENTRY(cpu_do_resume)
msr mdscr_el1, x10
msr sctlr_el1, x12
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
msr tpidr_el1, x13
+alternative_else
+ msr tpidr_el2, x13
+alternative_endif
msr sp_el0, x14
/*
* Restore oslsr_el1 by writing oslar_el1
--
2.13.3
^ permalink raw reply related [flat|nested] 38+ messages in thread
[parent not found: <20170808164616.25949-5-james.morse-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH v2 04/11] arm64: alternatives: use tpidr_el2 on VHE hosts
2017-08-08 16:46 ` James Morse
@ 2017-09-17 14:43 ` Christoffer Dall
-1 siblings, 0 replies; 38+ messages in thread
From: Christoffer Dall @ 2017-09-17 14:43 UTC (permalink / raw)
To: James Morse
Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho
On Tue, Aug 08, 2017 at 05:46:09PM +0100, James Morse wrote:
> Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in
> tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1
> on VHE hosts, and allows future code to blindly access per-cpu variables
> without triggering world-switch.
>
> Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
> ---
> Changes since v1:
> * cpu_copy_el2regs()'s 'have I been patched' test now always sets a register,
> just in case the compiler puts do_copyregs on the stack.
I don't understand this?
>
> arch/arm64/include/asm/assembler.h | 8 ++++++++
> arch/arm64/include/asm/percpu.h | 11 +++++++++--
> arch/arm64/include/asm/processor.h | 1 +
> arch/arm64/kernel/cpufeature.c | 23 +++++++++++++++++++++++
> arch/arm64/mm/proc.S | 8 ++++++++
> 5 files changed, 49 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 1b67c3782d00..06252602901e 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -236,7 +236,11 @@ lr .req x30 // link register
> */
> .macro adr_this_cpu, dst, sym, tmp
> adr_l \dst, \sym
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> mrs \tmp, tpidr_el1
> +alternative_else
> + mrs \tmp, tpidr_el2
> +alternative_endif
> add \dst, \dst, \tmp
> .endm
>
> @@ -247,7 +251,11 @@ lr .req x30 // link register
> */
> .macro ldr_this_cpu dst, sym, tmp
> adr_l \dst, \sym
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> mrs \tmp, tpidr_el1
> +alternative_else
> + mrs \tmp, tpidr_el2
> +alternative_endif
> ldr \dst, [\dst, \tmp]
> .endm
>
> diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
> index 3bd498e4de4c..43393208229e 100644
> --- a/arch/arm64/include/asm/percpu.h
> +++ b/arch/arm64/include/asm/percpu.h
> @@ -16,11 +16,15 @@
> #ifndef __ASM_PERCPU_H
> #define __ASM_PERCPU_H
>
> +#include <asm/alternative.h>
> #include <asm/stack_pointer.h>
>
> static inline void set_my_cpu_offset(unsigned long off)
> {
> - asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
> + asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
> + "msr tpidr_el2, %0",
> + ARM64_HAS_VIRT_HOST_EXTN)
> + :: "r" (off) : "memory");
> }
>
> static inline unsigned long __my_cpu_offset(void)
> @@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void)
> * We want to allow caching the value, so avoid using volatile and
> * instead use a fake stack read to hazard against barrier().
> */
> - asm("mrs %0, tpidr_el1" : "=r" (off) :
> + asm(ALTERNATIVE("mrs %0, tpidr_el1",
> + "mrs %0, tpidr_el2",
> + ARM64_HAS_VIRT_HOST_EXTN)
> + : "=r" (off) :
> "Q" (*(const unsigned long *)current_stack_pointer));
>
> return off;
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 64c9e78f9882..6d4fdfccf869 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -193,5 +193,6 @@ static inline void spin_lock_prefetch(const void *ptr)
>
> int cpu_enable_pan(void *__unused);
> int cpu_enable_cache_maint_trap(void *__unused);
> +int cpu_copy_el2regs(void *__unused);
>
> #endif /* __ASM_PROCESSOR_H */
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 9f9e0064c8c1..1960706d2422 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -864,6 +864,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> .capability = ARM64_HAS_VIRT_HOST_EXTN,
> .def_scope = SCOPE_SYSTEM,
> .matches = runs_at_el2,
> + .enable = cpu_copy_el2regs,
> },
> {
> .desc = "32-bit EL0 Support",
> @@ -1295,3 +1296,25 @@ static int __init enable_mrs_emulation(void)
> }
>
> late_initcall(enable_mrs_emulation);
> +
> +int cpu_copy_el2regs(void *__unused)
> +{
> + int do_copyregs = 0;
> +
> + /*
> + * Copy register values that aren't redirected by hardware.
> + *
> + * Before code patching, we only set tpidr_el1, all CPUs need to copy
> + * this value to tpidr_el2 before we patch the code. Once we've done
> + * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
> + * do anything here.
> + */
> + asm volatile(ALTERNATIVE("mov %0, #1", "mov %0, #0",
> + ARM64_HAS_VIRT_HOST_EXTN)
> + : "=r" (do_copyregs) : : );
> +
> + if (do_copyregs)
> + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
Could you just use has_vhe() here ?
> +
> + return 0;
> +}
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 877d42fb0df6..109d43a15aaf 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend)
> mrs x8, mdscr_el1
> mrs x9, oslsr_el1
> mrs x10, sctlr_el1
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> mrs x11, tpidr_el1
> +alternative_else
> + mrs x11, tpidr_el2
> +alternative_endif
> mrs x12, sp_el0
> stp x2, x3, [x0]
> stp x4, xzr, [x0, #16]
> @@ -116,7 +120,11 @@ ENTRY(cpu_do_resume)
> msr mdscr_el1, x10
>
> msr sctlr_el1, x12
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> msr tpidr_el1, x13
> +alternative_else
> + msr tpidr_el2, x13
> +alternative_endif
> msr sp_el0, x14
> /*
> * Restore oslsr_el1 by writing oslar_el1
> --
> 2.13.3
>
Otherwise:
Reviewed-by: Christoffer Dall <cdall-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 04/11] arm64: alternatives: use tpidr_el2 on VHE hosts
@ 2017-09-17 14:43 ` Christoffer Dall
0 siblings, 0 replies; 38+ messages in thread
From: Christoffer Dall @ 2017-09-17 14:43 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Aug 08, 2017 at 05:46:09PM +0100, James Morse wrote:
> Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in
> tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1
> on VHE hosts, and allows future code to blindly access per-cpu variables
> without triggering world-switch.
>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v1:
> * cpu_copy_el2regs()'s 'have I been patched' test now always sets a register,
> just in case the compiler puts do_copyregs on the stack.
I don't understand this?
>
> arch/arm64/include/asm/assembler.h | 8 ++++++++
> arch/arm64/include/asm/percpu.h | 11 +++++++++--
> arch/arm64/include/asm/processor.h | 1 +
> arch/arm64/kernel/cpufeature.c | 23 +++++++++++++++++++++++
> arch/arm64/mm/proc.S | 8 ++++++++
> 5 files changed, 49 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 1b67c3782d00..06252602901e 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -236,7 +236,11 @@ lr .req x30 // link register
> */
> .macro adr_this_cpu, dst, sym, tmp
> adr_l \dst, \sym
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> mrs \tmp, tpidr_el1
> +alternative_else
> + mrs \tmp, tpidr_el2
> +alternative_endif
> add \dst, \dst, \tmp
> .endm
>
> @@ -247,7 +251,11 @@ lr .req x30 // link register
> */
> .macro ldr_this_cpu dst, sym, tmp
> adr_l \dst, \sym
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> mrs \tmp, tpidr_el1
> +alternative_else
> + mrs \tmp, tpidr_el2
> +alternative_endif
> ldr \dst, [\dst, \tmp]
> .endm
>
> diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
> index 3bd498e4de4c..43393208229e 100644
> --- a/arch/arm64/include/asm/percpu.h
> +++ b/arch/arm64/include/asm/percpu.h
> @@ -16,11 +16,15 @@
> #ifndef __ASM_PERCPU_H
> #define __ASM_PERCPU_H
>
> +#include <asm/alternative.h>
> #include <asm/stack_pointer.h>
>
> static inline void set_my_cpu_offset(unsigned long off)
> {
> - asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
> + asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
> + "msr tpidr_el2, %0",
> + ARM64_HAS_VIRT_HOST_EXTN)
> + :: "r" (off) : "memory");
> }
>
> static inline unsigned long __my_cpu_offset(void)
> @@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void)
> * We want to allow caching the value, so avoid using volatile and
> * instead use a fake stack read to hazard against barrier().
> */
> - asm("mrs %0, tpidr_el1" : "=r" (off) :
> + asm(ALTERNATIVE("mrs %0, tpidr_el1",
> + "mrs %0, tpidr_el2",
> + ARM64_HAS_VIRT_HOST_EXTN)
> + : "=r" (off) :
> "Q" (*(const unsigned long *)current_stack_pointer));
>
> return off;
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 64c9e78f9882..6d4fdfccf869 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -193,5 +193,6 @@ static inline void spin_lock_prefetch(const void *ptr)
>
> int cpu_enable_pan(void *__unused);
> int cpu_enable_cache_maint_trap(void *__unused);
> +int cpu_copy_el2regs(void *__unused);
>
> #endif /* __ASM_PROCESSOR_H */
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 9f9e0064c8c1..1960706d2422 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -864,6 +864,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> .capability = ARM64_HAS_VIRT_HOST_EXTN,
> .def_scope = SCOPE_SYSTEM,
> .matches = runs_at_el2,
> + .enable = cpu_copy_el2regs,
> },
> {
> .desc = "32-bit EL0 Support",
> @@ -1295,3 +1296,25 @@ static int __init enable_mrs_emulation(void)
> }
>
> late_initcall(enable_mrs_emulation);
> +
> +int cpu_copy_el2regs(void *__unused)
> +{
> + int do_copyregs = 0;
> +
> + /*
> + * Copy register values that aren't redirected by hardware.
> + *
> + * Before code patching, we only set tpidr_el1, all CPUs need to copy
> + * this value to tpidr_el2 before we patch the code. Once we've done
> + * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
> + * do anything here.
> + */
> + asm volatile(ALTERNATIVE("mov %0, #1", "mov %0, #0",
> + ARM64_HAS_VIRT_HOST_EXTN)
> + : "=r" (do_copyregs) : : );
> +
> + if (do_copyregs)
> + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
Could you just use has_vhe() here ?
> +
> + return 0;
> +}
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 877d42fb0df6..109d43a15aaf 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend)
> mrs x8, mdscr_el1
> mrs x9, oslsr_el1
> mrs x10, sctlr_el1
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> mrs x11, tpidr_el1
> +alternative_else
> + mrs x11, tpidr_el2
> +alternative_endif
> mrs x12, sp_el0
> stp x2, x3, [x0]
> stp x4, xzr, [x0, #16]
> @@ -116,7 +120,11 @@ ENTRY(cpu_do_resume)
> msr mdscr_el1, x10
>
> msr sctlr_el1, x12
> +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> msr tpidr_el1, x13
> +alternative_else
> + msr tpidr_el2, x13
> +alternative_endif
> msr sp_el0, x14
> /*
> * Restore oslsr_el1 by writing oslar_el1
> --
> 2.13.3
>
Otherwise:
Reviewed-by: Christoffer Dall <cdall@linaro.org>
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v2 04/11] arm64: alternatives: use tpidr_el2 on VHE hosts
2017-09-17 14:43 ` Christoffer Dall
@ 2017-09-19 9:55 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-09-19 9:55 UTC (permalink / raw)
To: Christoffer Dall
Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho
Hi Christoffer,
On 17/09/17 15:43, Christoffer Dall wrote:
> On Tue, Aug 08, 2017 at 05:46:09PM +0100, James Morse wrote:
>> Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in
>> tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1
>> on VHE hosts, and allows future code to blindly access per-cpu variables
>> without triggering world-switch.
>> Changes since v1:
>> * cpu_copy_el2regs()'s 'have I been patched' test now always sets a register,
>> just in case the compiler puts do_copyregs on the stack.
> I don't understand this?
Looks like this was a note to myself, translation below:
>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>> index 9f9e0064c8c1..1960706d2422 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -1295,3 +1296,25 @@ static int __init enable_mrs_emulation(void)
>> }
>>
>> late_initcall(enable_mrs_emulation);
>> +
>> +int cpu_copy_el2regs(void *__unused)
>> +{
>> + int do_copyregs = 0;
>> +
>> + /*
>> + * Copy register values that aren't redirected by hardware.
>> + *
>> + * Before code patching, we only set tpidr_el1, all CPUs need to copy
>> + * this value to tpidr_el2 before we patch the code. Once we've done
>> + * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
>> + * do anything here.
>> + */
>> + asm volatile(ALTERNATIVE("mov %0, #1", "mov %0, #0",
>> + ARM64_HAS_VIRT_HOST_EXTN)
>> + : "=r" (do_copyregs) : : );
>> +
>> + if (do_copyregs)
>> + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
>
> Could you just use has_vhe() here ?
This is the 'have I been patched' test.
Before patching the my_cpu_offset value will be in tpidr_el1, we copy it to
tpidr_el2.
Once smp_cpus_done() calls apply_alternatives_all(), set_my_cpu_offset() is
patched to write its value into tpidr_el2. CPUs that come online after this
point don't need the value copying, but this cpufeature enable call will still
be called.
The skimmed-over information is that cpufeature's enable methods are called
locally when any core comes online, even after the alternatives have been
applied. (we also modify pstate and set sctlr bits from these).
Would it be clearer if I rewrote the comment as:
> cpufeature's enable methods are called whenever a CPU comes online, but we
> only need to copy tpidr_el1 -> tpidr_el2 on CPUs that were online before the
> alternatives are applied. After that point a VHE system will only use
> tpidr_el2.
The change-since-v1 was paranoia as 'do_copyregs' wasn't always updated: I was
worried about the compiler ditching the '=0' initialisation as its written by
the asm() before being read... but not once we've patched in the alternative.
Having to put 'do_copyregs' on the stack, but then deciding to just allocate it
late was where I thought this might happen. Short-version: I don't trust the
compiler.
>> +
>> + return 0;
>> +}
> Otherwise:
>
> Reviewed-by: Christoffer Dall <cdall-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
Thanks!
James
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 04/11] arm64: alternatives: use tpidr_el2 on VHE hosts
@ 2017-09-19 9:55 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-09-19 9:55 UTC (permalink / raw)
To: linux-arm-kernel
Hi Christoffer,
On 17/09/17 15:43, Christoffer Dall wrote:
> On Tue, Aug 08, 2017 at 05:46:09PM +0100, James Morse wrote:
>> Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in
>> tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1
>> on VHE hosts, and allows future code to blindly access per-cpu variables
>> without triggering world-switch.
>> Changes since v1:
>> * cpu_copy_el2regs()'s 'have I been patched' test now always sets a register,
>> just in case the compiler puts do_copyregs on the stack.
> I don't understand this?
Looks like this was a note to myself, translation below:
>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>> index 9f9e0064c8c1..1960706d2422 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -1295,3 +1296,25 @@ static int __init enable_mrs_emulation(void)
>> }
>>
>> late_initcall(enable_mrs_emulation);
>> +
>> +int cpu_copy_el2regs(void *__unused)
>> +{
>> + int do_copyregs = 0;
>> +
>> + /*
>> + * Copy register values that aren't redirected by hardware.
>> + *
>> + * Before code patching, we only set tpidr_el1, all CPUs need to copy
>> + * this value to tpidr_el2 before we patch the code. Once we've done
>> + * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
>> + * do anything here.
>> + */
>> + asm volatile(ALTERNATIVE("mov %0, #1", "mov %0, #0",
>> + ARM64_HAS_VIRT_HOST_EXTN)
>> + : "=r" (do_copyregs) : : );
>> +
>> + if (do_copyregs)
>> + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
>
> Could you just use has_vhe() here ?
This is the 'have I been patched' test.
Before patching the my_cpu_offset value will be in tpidr_el1, we copy it to
tpidr_el2.
Once smp_cpus_done() calls apply_alternatives_all(), set_my_cpu_offset() is
patched to write its value into tpidr_el2. CPUs that come online after this
point don't need the value copying, but this cpufeature enable call will still
be called.
The skimmed-over information is that cpufeature's enable methods are called
locally when any core comes online, even after the alternatives have been
applied. (we also modify pstate and set sctlr bits from these).
Would it be clearer if I rewrote the comment as:
> cpufeature's enable methods are called whenever a CPU comes online, but we
> only need to copy tpidr_el1 -> tpidr_el2 on CPUs that were online before the
> alternatives are applied. After that point a VHE system will only use
> tpidr_el2.
The change-since-v1 was paranoia as 'do_copyregs' wasn't always updated: I was
worried about the compiler ditching the '=0' initialisation as its written by
the asm() before being read... but not once we've patched in the alternative.
Having to put 'do_copyregs' on the stack, but then deciding to just allocate it
late was where I thought this might happen. Short-version: I don't trust the
compiler.
>> +
>> + return 0;
>> +}
> Otherwise:
>
> Reviewed-by: Christoffer Dall <cdall@linaro.org>
Thanks!
James
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 05/11] KVM: arm64: Stop save/restoring host tpidr_el1 on VHE
2017-08-08 16:46 ` James Morse
@ 2017-08-08 16:46 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
Cc: kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho, James Morse
Now that a VHE host uses tpidr_el2 for the cpu offset we no longer
need KVM to save/restore tpidr_el1. Move this from the 'common' code
into the non-vhe code. While we're at it, on VHE we don't need to
save the ELR or SPSR as kernel_entry in entry.S will have pushed these
onto the kernel stack, and will restore them from there. Move these
to the non-vhe code as we need them to get back to the host.
Finally remove the always-copy-tpidr we hid in the stage2 setup
code, cpufeature's enable callback will do this for VHE, we only
need KVM to do it for non-vhe. Add the copy into kvm-init instead.
Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
Reviewed-by: Christoffer Dall <cdall-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
---
Changes since v1:
* Switched KVM<->arm64 in the subject.
arch/arm64/kvm/hyp-init.S | 4 ++++
arch/arm64/kvm/hyp/s2-setup.c | 3 ---
arch/arm64/kvm/hyp/sysreg-sr.c | 16 ++++++++--------
3 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 3f9615582377..fbf259893f6a 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -122,6 +122,10 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
kern_hyp_va x2
msr vbar_el2, x2
+ /* copy tpidr_el1 into tpidr_el2 for use by HYP */
+ mrs x1, tpidr_el1
+ msr tpidr_el2, x1
+
/* Hello, World! */
eret
ENDPROC(__kvm_hyp_init)
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index eb401dbb285e..b81f4091c909 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -84,8 +84,5 @@ u32 __hyp_text __init_stage2_translation(void)
write_sysreg(val, vtcr_el2);
- /* copy tpidr_el1 into tpidr_el2 for use by HYP */
- write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
-
return parange;
}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 934137647837..c54cc2afb92b 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
/*
* Non-VHE: Both host and guest must save everything.
*
- * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc,
- * pstate, and guest must save everything.
+ * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
+ * and guest must save everything.
*/
static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
@@ -36,11 +36,8 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
- ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
- ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
- ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
}
static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
@@ -62,10 +59,13 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
+ ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+ ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
+ ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
}
static hyp_alternate_select(__sysreg_call_save_host_state,
@@ -89,11 +89,8 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
- write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
- write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
- write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
}
static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
@@ -115,10 +112,13 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
+ write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
}
static hyp_alternate_select(__sysreg_call_restore_host_state,
--
2.13.3
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 05/11] KVM: arm64: Stop save/restoring host tpidr_el1 on VHE
@ 2017-08-08 16:46 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel
Now that a VHE host uses tpidr_el2 for the cpu offset we no longer
need KVM to save/restore tpidr_el1. Move this from the 'common' code
into the non-vhe code. While we're at it, on VHE we don't need to
save the ELR or SPSR as kernel_entry in entry.S will have pushed these
onto the kernel stack, and will restore them from there. Move these
to the non-vhe code as we need them to get back to the host.
Finally remove the always-copy-tpidr we hid in the stage2 setup
code, cpufeature's enable callback will do this for VHE, we only
need KVM to do it for non-vhe. Add the copy into kvm-init instead.
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
---
Changes since v1:
* Switched KVM<->arm64 in the subject.
arch/arm64/kvm/hyp-init.S | 4 ++++
arch/arm64/kvm/hyp/s2-setup.c | 3 ---
arch/arm64/kvm/hyp/sysreg-sr.c | 16 ++++++++--------
3 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 3f9615582377..fbf259893f6a 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -122,6 +122,10 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
kern_hyp_va x2
msr vbar_el2, x2
+ /* copy tpidr_el1 into tpidr_el2 for use by HYP */
+ mrs x1, tpidr_el1
+ msr tpidr_el2, x1
+
/* Hello, World! */
eret
ENDPROC(__kvm_hyp_init)
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index eb401dbb285e..b81f4091c909 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -84,8 +84,5 @@ u32 __hyp_text __init_stage2_translation(void)
write_sysreg(val, vtcr_el2);
- /* copy tpidr_el1 into tpidr_el2 for use by HYP */
- write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
-
return parange;
}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 934137647837..c54cc2afb92b 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
/*
* Non-VHE: Both host and guest must save everything.
*
- * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc,
- * pstate, and guest must save everything.
+ * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
+ * and guest must save everything.
*/
static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
@@ -36,11 +36,8 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
- ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
- ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
- ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
}
static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
@@ -62,10 +59,13 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
+ ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+ ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
+ ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
}
static hyp_alternate_select(__sysreg_call_save_host_state,
@@ -89,11 +89,8 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
- write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
- write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
- write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
}
static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
@@ -115,10 +112,13 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
+ write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
}
static hyp_alternate_select(__sysreg_call_restore_host_state,
--
2.13.3
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 06/11] Docs: dt: add devicetree binding for describing arm64 SDEI firmware
2017-08-08 16:46 ` James Morse
@ 2017-08-08 16:46 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
Cc: kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho, James Morse
The Software Delegated Exception Interface (SDEI) is an ARM standard
for registering callbacks from the platform firmware into the OS.
This is typically used to implement RAS notifications, or from an
IRQ that has been promoted to a firmware-assisted NMI.
Add a new devicetree binding to describe the SDE firmware interface.
Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
---
Changes since v1:
* Added bound IRQ description for binding,
* Reference SMC-CC, not 'AAPCS like'
* Move sdei node under firmware node (and the file path)
.../devicetree/bindings/arm/firmware/sdei.txt | 42 ++++++++++++++++++++++
1 file changed, 42 insertions(+)
create mode 100644 Documentation/devicetree/bindings/arm/firmware/sdei.txt
diff --git a/Documentation/devicetree/bindings/arm/firmware/sdei.txt b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
new file mode 100644
index 000000000000..124019acd8ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
@@ -0,0 +1,42 @@
+* Software Delegated Exception Interface (SDEI)
+
+Firmware implementing the SDEI functions described in ARM document number
+ARM DEN 0054A ("Software Delegated Exception Interface") can be used by
+Linux to receive notification of events such as those generated by
+firmware-first error handling, or from an IRQ that has been promoted to
+a firmware-assisted NMI.
+
+The interface provides a number of API functions for registering callbacks
+and enabling/disabling events. Functions are invoked by trapping to the
+privilege level of the SDEI firmware (specified as part of the binding
+below) and passing arguments in a manner specified by the "SMC Calling
+Convention (ARM DEN 0028B):
+
+ r0 => 32-bit Function ID / return value
+ {r1 - r3} => Parameters
+
+Note that the immediate field of the trapping instruction must be set
+to #0.
+
+The SDEI_EVENT_REGISTER function registers a callback in the kernel
+text to handle the specified event number.
+
+The sdei node should be a childe node of '/firmware' and have required
+properties:
+
+ - compatible : should contain:
+ * "arm,sdei-1.0" : For implementations complying to SDEI version 1.x.
+
+ - method : The method of calling the SDEI firmware. Permitted
+ values are:
+ * "smc" : SMC #0, with the register assignments specified in this
+ binding.
+ * "hvc" : HVC #0, with the register assignments specified in this
+ binding.
+Example:
+ firmware {
+ sdei {
+ compatible = "arm,sdei-1.0";
+ method = "smc";
+ };
+ };
--
2.13.3
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 06/11] Docs: dt: add devicetree binding for describing arm64 SDEI firmware
@ 2017-08-08 16:46 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel
The Software Delegated Exception Interface (SDEI) is an ARM standard
for registering callbacks from the platform firmware into the OS.
This is typically used to implement RAS notifications, or from an
IRQ that has been promoted to a firmware-assisted NMI.
Add a new devicetree binding to describe the SDE firmware interface.
Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since v1:
* Added bound IRQ description for binding,
* Reference SMC-CC, not 'AAPCS like'
* Move sdei node under firmware node (and the file path)
.../devicetree/bindings/arm/firmware/sdei.txt | 42 ++++++++++++++++++++++
1 file changed, 42 insertions(+)
create mode 100644 Documentation/devicetree/bindings/arm/firmware/sdei.txt
diff --git a/Documentation/devicetree/bindings/arm/firmware/sdei.txt b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
new file mode 100644
index 000000000000..124019acd8ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
@@ -0,0 +1,42 @@
+* Software Delegated Exception Interface (SDEI)
+
+Firmware implementing the SDEI functions described in ARM document number
+ARM DEN 0054A ("Software Delegated Exception Interface") can be used by
+Linux to receive notification of events such as those generated by
+firmware-first error handling, or from an IRQ that has been promoted to
+a firmware-assisted NMI.
+
+The interface provides a number of API functions for registering callbacks
+and enabling/disabling events. Functions are invoked by trapping to the
+privilege level of the SDEI firmware (specified as part of the binding
+below) and passing arguments in a manner specified by the "SMC Calling
+Convention (ARM DEN 0028B):
+
+ r0 => 32-bit Function ID / return value
+ {r1 - r3} => Parameters
+
+Note that the immediate field of the trapping instruction must be set
+to #0.
+
+The SDEI_EVENT_REGISTER function registers a callback in the kernel
+text to handle the specified event number.
+
+The sdei node should be a childe node of '/firmware' and have required
+properties:
+
+ - compatible : should contain:
+ * "arm,sdei-1.0" : For implementations complying to SDEI version 1.x.
+
+ - method : The method of calling the SDEI firmware. Permitted
+ values are:
+ * "smc" : SMC #0, with the register assignments specified in this
+ binding.
+ * "hvc" : HVC #0, with the register assignments specified in this
+ binding.
+Example:
+ firmware {
+ sdei {
+ compatible = "arm,sdei-1.0";
+ method = "smc";
+ };
+ };
--
2.13.3
^ permalink raw reply related [flat|nested] 38+ messages in thread
[parent not found: <20170808164616.25949-7-james.morse-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH v2 06/11] Docs: dt: add devicetree binding for describing arm64 SDEI firmware
2017-08-08 16:46 ` James Morse
@ 2017-08-17 15:09 ` Rob Herring
-1 siblings, 0 replies; 38+ messages in thread
From: Rob Herring @ 2017-08-17 15:09 UTC (permalink / raw)
To: James Morse
Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Loc Ho
On Tue, Aug 08, 2017 at 05:46:11PM +0100, James Morse wrote:
> The Software Delegated Exception Interface (SDEI) is an ARM standard
> for registering callbacks from the platform firmware into the OS.
> This is typically used to implement RAS notifications, or from an
> IRQ that has been promoted to a firmware-assisted NMI.
>
> Add a new devicetree binding to describe the SDE firmware interface.
>
> Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
> ---
> Changes since v1:
> * Added bound IRQ description for binding,
> * Reference SMC-CC, not 'AAPCS like'
> * Move sdei node under firmware node (and the file path)
>
> .../devicetree/bindings/arm/firmware/sdei.txt | 42 ++++++++++++++++++++++
> 1 file changed, 42 insertions(+)
> create mode 100644 Documentation/devicetree/bindings/arm/firmware/sdei.txt
Acked-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 06/11] Docs: dt: add devicetree binding for describing arm64 SDEI firmware
@ 2017-08-17 15:09 ` Rob Herring
0 siblings, 0 replies; 38+ messages in thread
From: Rob Herring @ 2017-08-17 15:09 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Aug 08, 2017 at 05:46:11PM +0100, James Morse wrote:
> The Software Delegated Exception Interface (SDEI) is an ARM standard
> for registering callbacks from the platform firmware into the OS.
> This is typically used to implement RAS notifications, or from an
> IRQ that has been promoted to a firmware-assisted NMI.
>
> Add a new devicetree binding to describe the SDE firmware interface.
>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v1:
> * Added bound IRQ description for binding,
> * Reference SMC-CC, not 'AAPCS like'
> * Move sdei node under firmware node (and the file path)
>
> .../devicetree/bindings/arm/firmware/sdei.txt | 42 ++++++++++++++++++++++
> 1 file changed, 42 insertions(+)
> create mode 100644 Documentation/devicetree/bindings/arm/firmware/sdei.txt
Acked-by: Rob Herring <robh@kernel.org>
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 08/11] arm64: kernel: Add arch-specific SDEI entry code and CPU masking
2017-08-08 16:46 ` James Morse
@ 2017-08-08 16:46 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
Cc: kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho, James Morse
The Software Delegated Exception Interface (SDEI) is an ARM standard
for registering callbacks from the platform firmware into the OS.
This is typically used to implement RAS notifications.
Such notifications enter the kernel at the registered entry-point
with the register values of the interrupted CPU context. Because this
is not a CPU exception, it cannot reuse the existing entry code.
(crucially we don't immediately know which exception level we interrupted),
Add entry.S asm to set us up for calling into C code. If the event
interrupted code that had interrupts masked, we always return to that
location. Otherwise we pretend this was an IRQ, and use SDEI's
complete_and_resume call to return to vbar_el1 + offset.
This allows the kernel to deliver signals to user space processes. For
KVM this triggers the world switch, a quick spin round vcpu_run, then
back into the guest, unless there are pending signals.
Add sdei_mask_local_cpu() calls to the smp_send_stop() code, this covers
the panic() code-path, which doesn't invoke cpuhotplug notifiers, and
only calls the panic notifiers if there is no kdump kernel registered.
Because we can interrupt entry-from/exit-to EL0, we can't trust the value
in sp_el0 even if we interrupted the kernel, in this case the code in
entry.S will save/restore sp_el0 and use the value in __entry_task.
Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
---
arch/arm64/Kconfig | 2 +-
arch/arm64/include/asm/sdei.h | 48 ++++++++++++++++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 2 +
arch/arm64/kernel/sdei-entry.S | 92 +++++++++++++++++++++++++++++++
arch/arm64/kernel/sdei.c | 119 ++++++++++++++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 7 +++
drivers/firmware/Kconfig | 1 +
drivers/firmware/arm_sdei.c | 1 +
include/linux/sdei.h | 2 +
10 files changed, 274 insertions(+), 1 deletion(-)
create mode 100644 arch/arm64/include/asm/sdei.h
create mode 100644 arch/arm64/kernel/sdei-entry.S
create mode 100644 arch/arm64/kernel/sdei.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dfd908630631..d3913cffa3ac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -97,7 +97,7 @@ config ARM64
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP if NUMA
- select HAVE_NMI if ACPI_APEI_SEA
+ select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
new file mode 100644
index 000000000000..9d1b2b44fedf
--- /dev/null
+++ b/arch/arm64/include/asm/sdei.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SDEI_H
+#define __ASM_SDEI_H
+
+/* Values for sdei_exit_mode */
+#define SDEI_EXIT_HVC 0
+#define SDEI_EXIT_SMC 1
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+#include <asm/virt.h>
+
+extern unsigned long sdei_exit_mode;
+
+/* Software Delegated Exception entry point from firmware*/
+asmlinkage void __sdei_asm_handler(unsigned long event_num, unsigned long arg,
+ unsigned long pc, unsigned long pstate);
+
+/*
+ * The above entry points do the minimum to call C code. This function does
+ * anything else, before calling the driver.
+ */
+struct sdei_registered_event;
+asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg);
+
+extern unsigned long sdei_arch_get_entry_point(int conduit);
+#define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x)
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_SDEI_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index f2b4e816b6de..d22bae586ade 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -53,6 +53,7 @@ arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o
arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o sdei-entry.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index b3bb7ef97bc8..2dd036407046 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
+#include <linux/sdei.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
#include <asm/thread_info.h>
@@ -153,5 +154,6 @@ int main(void)
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val));
+ DEFINE(SDEI_EVENT_INTREGS, offsetof(struct sdei_registered_event, interrupted_regs));
return 0;
}
diff --git a/arch/arm64/kernel/sdei-entry.S b/arch/arm64/kernel/sdei-entry.S
new file mode 100644
index 000000000000..00a637eee155
--- /dev/null
+++ b/arch/arm64/kernel/sdei-entry.S
@@ -0,0 +1,92 @@
+/*
+ * Software Delegated Exception entry point from firmware to the kernel.
+ *
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/alternative.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/ptrace.h>
+#include <asm/sdei.h>
+#include <uapi/linux/sdei.h>
+
+/*
+ * Software Delegated Exception entry point.
+ *
+ * x0: Event number
+ * x1: struct sdei_registered_event argument from registration time.
+ * x2: interrupted PC
+ * x3: interrupted PSTATE
+ *
+ * Firmware has preserved x0->x17 for us, we must save/restore the rest to
+ * follow SMC-CC. We save (or retrieve) all the registers as the handler may
+ * want them.
+ */
+ENTRY(__sdei_asm_handler)
+ stp x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC]
+ stp x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2]
+ stp x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3]
+ stp x8, x9, [x1, #SDEI_EVENT_INTREGS + 16 * 4]
+ stp x10, x11, [x1, #SDEI_EVENT_INTREGS + 16 * 5]
+ stp x12, x13, [x1, #SDEI_EVENT_INTREGS + 16 * 6]
+ stp x14, x15, [x1, #SDEI_EVENT_INTREGS + 16 * 7]
+ stp x16, x17, [x1, #SDEI_EVENT_INTREGS + 16 * 8]
+ stp x18, x19, [x1, #SDEI_EVENT_INTREGS + 16 * 9]
+ stp x20, x21, [x1, #SDEI_EVENT_INTREGS + 16 * 10]
+ stp x22, x23, [x1, #SDEI_EVENT_INTREGS + 16 * 11]
+ stp x24, x25, [x1, #SDEI_EVENT_INTREGS + 16 * 12]
+ stp x26, x27, [x1, #SDEI_EVENT_INTREGS + 16 * 13]
+ stp x28, x29, [x1, #SDEI_EVENT_INTREGS + 16 * 14]
+
+ mov x19, x1
+
+ /*
+ * We may have interrupted userspace, or a guest. Restore sp_el0 from
+ * __entry_task.
+ */
+ mrs x0, sp_el0
+ stp lr, x0, [x19, #SDEI_EVENT_INTREGS + S_LR]
+ ldr_this_cpu dst=x0, sym=__entry_task, tmp=x1
+ msr sp_el0, x0
+
+ add x0, x19, #SDEI_EVENT_INTREGS
+ mov x1, x19
+ bl __sdei_handler
+
+ /* restore regs >x17 that we clobbered */
+ ldp x28, x29, [x19, #SDEI_EVENT_INTREGS + 16 * 14]
+ ldp lr, x1, [x19, #SDEI_EVENT_INTREGS + S_LR]
+ msr sp_el0, x1
+ ldp x18, x19, [x19, #SDEI_EVENT_INTREGS + 16 * 9]
+
+ mov x1, x0 // address to complete_and_resume
+ /* x0 = (x0 <= 1) ? EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME */
+ cmp x0, #1
+ mov_q x2, SDEI_1_0_FN_SDEI_EVENT_COMPLETE
+ mov_q x3, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
+ csel x0, x2, x3, ls
+
+ /* On success, this call never returns... */
+ ldr_l x2, sdei_exit_mode
+ cmp x2, #SDEI_EXIT_SMC
+ b.ne 1f
+ smc #0
+ b .
+1: hvc #0
+ b .
+ENDPROC(__sdei_asm_handler)
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
new file mode 100644
index 000000000000..60693a9728bb
--- /dev/null
+++ b/arch/arm64/kernel/sdei.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/sched/task_stack.h>
+#include <linux/irqflags.h>
+#include <linux/hardirq.h>
+#include <linux/sdei.h>
+
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+
+unsigned long sdei_exit_mode;
+
+unsigned long sdei_arch_get_entry_point(int conduit)
+{
+ /*
+ * SDEI works between adjacent exception levels. If we booted at EL1 we
+ * assume a hypervisor is marshalling events. If we booted at EL2 and
+ * dropped to EL1 because we don't support VHE, then we can't support
+ * SDEI.
+ */
+ if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
+ pr_err("Not supported on this hardware/boot configuration\n");
+ return 0;
+ }
+
+ sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+ return (unsigned long)__sdei_asm_handler;
+}
+
+
+/*
+ * __sdei_handler() returns one of:
+ * SDEI_EV_HANDLED - success, return to the interrupted context.
+ * SDEI_EV_FAILED - failure, return this error code to firmare.
+ * virtual-address - success, return to this address.
+ */
+static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg)
+{
+ u32 mode;
+ int i, err = 0;
+ int clobbered_registers = 4;
+ u32 kernel_mode = read_sysreg(CurrentEL) | 1; /* +SPSel */
+ unsigned long vbar = read_sysreg(vbar_el1);
+
+ /* Retrieve the missing registers values */
+ for (i = 0; i < clobbered_registers; i++) {
+ /* from within the handler, this call always succeeds */
+ sdei_api_event_context(i, ®s->regs[i]);
+ }
+
+ err = sdei_event_handler(regs, arg);
+ if (err)
+ return SDEI_EV_FAILED;
+
+ mode = regs->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK);
+
+ /*
+ * If we interrupted the kernel with interrupts masked, we always go
+ * back to wherever we came from.
+ */
+ if (mode == kernel_mode && !interrupts_enabled(regs))
+ return SDEI_EV_HANDLED;
+
+ /*
+ * Otherwise, we pretend this was an IRQ. This lets user space tasks
+ * receive signals before we return to them, and KVM to invoke it's
+ * world switch to do the same.
+ *
+ * See DDI0487B.a Table D1-7 'Vector offsets from vector table base
+ * address'.
+ */
+ if (mode == kernel_mode)
+ return vbar + 0x280;
+ else if (mode & PSR_MODE32_BIT)
+ return vbar + 0x680;
+
+ return vbar + 0x480;
+}
+
+
+asmlinkage __kprobes notrace unsigned long
+__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
+{
+ unsigned long ret;
+ bool do_nmi_exit = false;
+
+ /*
+ * nmi_enter() deals with printk() re-entrance and use of RCU when
+ * RCU believed this CPU was idle. Because critical events can
+ * interrupt normal events, we may already be in_nmi().
+ */
+ if (!in_nmi()) {
+ nmi_enter();
+ do_nmi_exit = true;
+ }
+
+ ret = _sdei_handler(regs, arg);
+
+ if (do_nmi_exit)
+ nmi_exit();
+
+ return ret;
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc66e6ec3a99..381b844aca6e 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -31,6 +31,7 @@
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/cpu.h>
+#include <linux/sdei.h>
#include <linux/smp.h>
#include <linux/seq_file.h>
#include <linux/irq.h>
@@ -838,6 +839,7 @@ static void ipi_cpu_stop(unsigned int cpu)
set_cpu_online(cpu, false);
local_irq_disable();
+ sdei_mask_local_cpu();
while (1)
cpu_relax();
@@ -855,6 +857,7 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
atomic_dec(&waiting_for_crash_ipi);
local_irq_disable();
+ sdei_mask_local_cpu();
#ifdef CONFIG_HOTPLUG_CPU
if (cpu_ops[cpu]->cpu_die)
@@ -974,6 +977,8 @@ void smp_send_stop(void)
if (num_online_cpus() > 1)
pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(cpu_online_mask));
+
+ sdei_mask_local_cpu();
}
#ifdef CONFIG_KEXEC_CORE
@@ -1001,6 +1006,8 @@ void smp_send_crash_stop(void)
if (atomic_read(&waiting_for_crash_ipi) > 0)
pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(&mask));
+
+ sdei_mask_local_cpu();
}
bool smp_crash_stop_failed(void)
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index d8a9859f6102..1f6633b337aa 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -50,6 +50,7 @@ config ARM_SCPI_POWER_DOMAIN
config ARM_SDE_INTERFACE
bool "ARM Software Delegated Exception Interface (SDEI)"
+ depends on ARM64
help
The Software Delegated Exception Interface (SDEI) is an ARM
standard for registering callbacks from the platform firmware
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 6155032c4958..74e84de5bf0b 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -154,6 +154,7 @@ int sdei_api_event_context(u32 query, u64 *result)
return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_CONTEXT, query, 0, 0, 0, 0,
result);
}
+NOKPROBE_SYMBOL(sdei_api_event_context);
static int sdei_api_event_get_info(u32 event, u32 info, u64 *result)
{
diff --git a/include/linux/sdei.h b/include/linux/sdei.h
index 5393b0eb1afd..d19dbf2f29ca 100644
--- a/include/linux/sdei.h
+++ b/include/linux/sdei.h
@@ -32,6 +32,8 @@ enum sdei_conduit_types {
CONDUIT_HVC,
};
+#include <asm/sdei.h>
+
/* Arch code should override this to set the entry point from firmware... */
#ifndef sdei_arch_get_entry_point
#define sdei_arch_get_entry_point(conduit) (0)
--
2.13.3
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 08/11] arm64: kernel: Add arch-specific SDEI entry code and CPU masking
@ 2017-08-08 16:46 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel
The Software Delegated Exception Interface (SDEI) is an ARM standard
for registering callbacks from the platform firmware into the OS.
This is typically used to implement RAS notifications.
Such notifications enter the kernel at the registered entry-point
with the register values of the interrupted CPU context. Because this
is not a CPU exception, it cannot reuse the existing entry code.
(crucially we don't immediately know which exception level we interrupted),
Add entry.S asm to set us up for calling into C code. If the event
interrupted code that had interrupts masked, we always return to that
location. Otherwise we pretend this was an IRQ, and use SDEI's
complete_and_resume call to return to vbar_el1 + offset.
This allows the kernel to deliver signals to user space processes. For
KVM this triggers the world switch, a quick spin round vcpu_run, then
back into the guest, unless there are pending signals.
Add sdei_mask_local_cpu() calls to the smp_send_stop() code, this covers
the panic() code-path, which doesn't invoke cpuhotplug notifiers, and
only calls the panic notifiers if there is no kdump kernel registered.
Because we can interrupt entry-from/exit-to EL0, we can't trust the value
in sp_el0 even if we interrupted the kernel, in this case the code in
entry.S will save/restore sp_el0 and use the value in __entry_task.
Signed-off-by: James Morse <james.morse@arm.com>
---
arch/arm64/Kconfig | 2 +-
arch/arm64/include/asm/sdei.h | 48 ++++++++++++++++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 2 +
arch/arm64/kernel/sdei-entry.S | 92 +++++++++++++++++++++++++++++++
arch/arm64/kernel/sdei.c | 119 ++++++++++++++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 7 +++
drivers/firmware/Kconfig | 1 +
drivers/firmware/arm_sdei.c | 1 +
include/linux/sdei.h | 2 +
10 files changed, 274 insertions(+), 1 deletion(-)
create mode 100644 arch/arm64/include/asm/sdei.h
create mode 100644 arch/arm64/kernel/sdei-entry.S
create mode 100644 arch/arm64/kernel/sdei.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index dfd908630631..d3913cffa3ac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -97,7 +97,7 @@ config ARM64
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP if NUMA
- select HAVE_NMI if ACPI_APEI_SEA
+ select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
new file mode 100644
index 000000000000..9d1b2b44fedf
--- /dev/null
+++ b/arch/arm64/include/asm/sdei.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SDEI_H
+#define __ASM_SDEI_H
+
+/* Values for sdei_exit_mode */
+#define SDEI_EXIT_HVC 0
+#define SDEI_EXIT_SMC 1
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+#include <asm/virt.h>
+
+extern unsigned long sdei_exit_mode;
+
+/* Software Delegated Exception entry point from firmware*/
+asmlinkage void __sdei_asm_handler(unsigned long event_num, unsigned long arg,
+ unsigned long pc, unsigned long pstate);
+
+/*
+ * The above entry points do the minimum to call C code. This function does
+ * anything else, before calling the driver.
+ */
+struct sdei_registered_event;
+asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg);
+
+extern unsigned long sdei_arch_get_entry_point(int conduit);
+#define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x)
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_SDEI_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index f2b4e816b6de..d22bae586ade 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -53,6 +53,7 @@ arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o
arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o sdei-entry.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index b3bb7ef97bc8..2dd036407046 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
+#include <linux/sdei.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
#include <asm/thread_info.h>
@@ -153,5 +154,6 @@ int main(void)
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val));
+ DEFINE(SDEI_EVENT_INTREGS, offsetof(struct sdei_registered_event, interrupted_regs));
return 0;
}
diff --git a/arch/arm64/kernel/sdei-entry.S b/arch/arm64/kernel/sdei-entry.S
new file mode 100644
index 000000000000..00a637eee155
--- /dev/null
+++ b/arch/arm64/kernel/sdei-entry.S
@@ -0,0 +1,92 @@
+/*
+ * Software Delegated Exception entry point from firmware to the kernel.
+ *
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/alternative.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/ptrace.h>
+#include <asm/sdei.h>
+#include <uapi/linux/sdei.h>
+
+/*
+ * Software Delegated Exception entry point.
+ *
+ * x0: Event number
+ * x1: struct sdei_registered_event argument from registration time.
+ * x2: interrupted PC
+ * x3: interrupted PSTATE
+ *
+ * Firmware has preserved x0->x17 for us, we must save/restore the rest to
+ * follow SMC-CC. We save (or retrieve) all the registers as the handler may
+ * want them.
+ */
+ENTRY(__sdei_asm_handler)
+ stp x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC]
+ stp x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2]
+ stp x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3]
+ stp x8, x9, [x1, #SDEI_EVENT_INTREGS + 16 * 4]
+ stp x10, x11, [x1, #SDEI_EVENT_INTREGS + 16 * 5]
+ stp x12, x13, [x1, #SDEI_EVENT_INTREGS + 16 * 6]
+ stp x14, x15, [x1, #SDEI_EVENT_INTREGS + 16 * 7]
+ stp x16, x17, [x1, #SDEI_EVENT_INTREGS + 16 * 8]
+ stp x18, x19, [x1, #SDEI_EVENT_INTREGS + 16 * 9]
+ stp x20, x21, [x1, #SDEI_EVENT_INTREGS + 16 * 10]
+ stp x22, x23, [x1, #SDEI_EVENT_INTREGS + 16 * 11]
+ stp x24, x25, [x1, #SDEI_EVENT_INTREGS + 16 * 12]
+ stp x26, x27, [x1, #SDEI_EVENT_INTREGS + 16 * 13]
+ stp x28, x29, [x1, #SDEI_EVENT_INTREGS + 16 * 14]
+
+ mov x19, x1
+
+ /*
+ * We may have interrupted userspace, or a guest. Restore sp_el0 from
+ * __entry_task.
+ */
+ mrs x0, sp_el0
+ stp lr, x0, [x19, #SDEI_EVENT_INTREGS + S_LR]
+ ldr_this_cpu dst=x0, sym=__entry_task, tmp=x1
+ msr sp_el0, x0
+
+ add x0, x19, #SDEI_EVENT_INTREGS
+ mov x1, x19
+ bl __sdei_handler
+
+ /* restore regs >x17 that we clobbered */
+ ldp x28, x29, [x19, #SDEI_EVENT_INTREGS + 16 * 14]
+ ldp lr, x1, [x19, #SDEI_EVENT_INTREGS + S_LR]
+ msr sp_el0, x1
+ ldp x18, x19, [x19, #SDEI_EVENT_INTREGS + 16 * 9]
+
+ mov x1, x0 // address to complete_and_resume
+ /* x0 = (x0 <= 1) ? EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME */
+ cmp x0, #1
+ mov_q x2, SDEI_1_0_FN_SDEI_EVENT_COMPLETE
+ mov_q x3, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
+ csel x0, x2, x3, ls
+
+ /* On success, this call never returns... */
+ ldr_l x2, sdei_exit_mode
+ cmp x2, #SDEI_EXIT_SMC
+ b.ne 1f
+ smc #0
+ b .
+1: hvc #0
+ b .
+ENDPROC(__sdei_asm_handler)
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
new file mode 100644
index 000000000000..60693a9728bb
--- /dev/null
+++ b/arch/arm64/kernel/sdei.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/sched/task_stack.h>
+#include <linux/irqflags.h>
+#include <linux/hardirq.h>
+#include <linux/sdei.h>
+
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+
+unsigned long sdei_exit_mode;
+
+unsigned long sdei_arch_get_entry_point(int conduit)
+{
+ /*
+ * SDEI works between adjacent exception levels. If we booted at EL1 we
+ * assume a hypervisor is marshalling events. If we booted at EL2 and
+ * dropped to EL1 because we don't support VHE, then we can't support
+ * SDEI.
+ */
+ if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
+ pr_err("Not supported on this hardware/boot configuration\n");
+ return 0;
+ }
+
+ sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+ return (unsigned long)__sdei_asm_handler;
+}
+
+
+/*
+ * __sdei_handler() returns one of:
+ * SDEI_EV_HANDLED - success, return to the interrupted context.
+ * SDEI_EV_FAILED - failure, return this error code to firmare.
+ * virtual-address - success, return to this address.
+ */
+static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg)
+{
+ u32 mode;
+ int i, err = 0;
+ int clobbered_registers = 4;
+ u32 kernel_mode = read_sysreg(CurrentEL) | 1; /* +SPSel */
+ unsigned long vbar = read_sysreg(vbar_el1);
+
+ /* Retrieve the missing registers values */
+ for (i = 0; i < clobbered_registers; i++) {
+ /* from within the handler, this call always succeeds */
+ sdei_api_event_context(i, ®s->regs[i]);
+ }
+
+ err = sdei_event_handler(regs, arg);
+ if (err)
+ return SDEI_EV_FAILED;
+
+ mode = regs->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK);
+
+ /*
+ * If we interrupted the kernel with interrupts masked, we always go
+ * back to wherever we came from.
+ */
+ if (mode == kernel_mode && !interrupts_enabled(regs))
+ return SDEI_EV_HANDLED;
+
+ /*
+ * Otherwise, we pretend this was an IRQ. This lets user space tasks
+ * receive signals before we return to them, and KVM to invoke it's
+ * world switch to do the same.
+ *
+ * See DDI0487B.a Table D1-7 'Vector offsets from vector table base
+ * address'.
+ */
+ if (mode == kernel_mode)
+ return vbar + 0x280;
+ else if (mode & PSR_MODE32_BIT)
+ return vbar + 0x680;
+
+ return vbar + 0x480;
+}
+
+
+asmlinkage __kprobes notrace unsigned long
+__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
+{
+ unsigned long ret;
+ bool do_nmi_exit = false;
+
+ /*
+ * nmi_enter() deals with printk() re-entrance and use of RCU when
+ * RCU believed this CPU was idle. Because critical events can
+ * interrupt normal events, we may already be in_nmi().
+ */
+ if (!in_nmi()) {
+ nmi_enter();
+ do_nmi_exit = true;
+ }
+
+ ret = _sdei_handler(regs, arg);
+
+ if (do_nmi_exit)
+ nmi_exit();
+
+ return ret;
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc66e6ec3a99..381b844aca6e 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -31,6 +31,7 @@
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/cpu.h>
+#include <linux/sdei.h>
#include <linux/smp.h>
#include <linux/seq_file.h>
#include <linux/irq.h>
@@ -838,6 +839,7 @@ static void ipi_cpu_stop(unsigned int cpu)
set_cpu_online(cpu, false);
local_irq_disable();
+ sdei_mask_local_cpu();
while (1)
cpu_relax();
@@ -855,6 +857,7 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
atomic_dec(&waiting_for_crash_ipi);
local_irq_disable();
+ sdei_mask_local_cpu();
#ifdef CONFIG_HOTPLUG_CPU
if (cpu_ops[cpu]->cpu_die)
@@ -974,6 +977,8 @@ void smp_send_stop(void)
if (num_online_cpus() > 1)
pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(cpu_online_mask));
+
+ sdei_mask_local_cpu();
}
#ifdef CONFIG_KEXEC_CORE
@@ -1001,6 +1006,8 @@ void smp_send_crash_stop(void)
if (atomic_read(&waiting_for_crash_ipi) > 0)
pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(&mask));
+
+ sdei_mask_local_cpu();
}
bool smp_crash_stop_failed(void)
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index d8a9859f6102..1f6633b337aa 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -50,6 +50,7 @@ config ARM_SCPI_POWER_DOMAIN
config ARM_SDE_INTERFACE
bool "ARM Software Delegated Exception Interface (SDEI)"
+ depends on ARM64
help
The Software Delegated Exception Interface (SDEI) is an ARM
standard for registering callbacks from the platform firmware
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 6155032c4958..74e84de5bf0b 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -154,6 +154,7 @@ int sdei_api_event_context(u32 query, u64 *result)
return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_CONTEXT, query, 0, 0, 0, 0,
result);
}
+NOKPROBE_SYMBOL(sdei_api_event_context);
static int sdei_api_event_get_info(u32 event, u32 info, u64 *result)
{
diff --git a/include/linux/sdei.h b/include/linux/sdei.h
index 5393b0eb1afd..d19dbf2f29ca 100644
--- a/include/linux/sdei.h
+++ b/include/linux/sdei.h
@@ -32,6 +32,8 @@ enum sdei_conduit_types {
CONDUIT_HVC,
};
+#include <asm/sdei.h>
+
/* Arch code should override this to set the entry point from firmware... */
#ifndef sdei_arch_get_entry_point
#define sdei_arch_get_entry_point(conduit) (0)
--
2.13.3
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 10/11] firmware: arm_sdei: add support for CPU private events
2017-08-08 16:46 ` James Morse
@ 2017-08-08 16:46 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
Cc: kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho, James Morse
Private SDE events are per-cpu, and need to be registered and enabled
on each CPU.
Hide this detail from the caller by adapting our {,un}register and
{en,dis}able calls to send an IPI to each CPU if the event is private.
CPU private events are unregistered when the CPU is powered-off, and
re-registered when the CPU is brought back online. This saves bringing
secondary cores back online to call private_reset() on shutdown, kexec
and resume from hibernate.
Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
---
Changes since v1:
* Made unregister on cpu-down print an error and continue as returning an error
from here is ignored.
drivers/firmware/arm_sdei.c | 241 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 227 insertions(+), 14 deletions(-)
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index abe032560663..67f941246711 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -20,9 +20,11 @@
#include <linux/acpi.h>
#include <linux/arm-smccc.h>
+#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/cpuhotplug.h>
+#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/hardirq.h>
@@ -65,12 +67,49 @@ struct sdei_event {
u64 priority;
/* This pointer is handed to firmware as the event argument. */
- struct sdei_registered_event *registered;
+ union {
+ /* Shared events */
+ struct sdei_registered_event *registered;
+
+ /* CPU private events */
+ struct sdei_registered_event __percpu *private_registered;
+ };
};
static LIST_HEAD(sdei_events);
static DEFINE_SPINLOCK(sdei_events_lock);
+/* When frozen, cpu-hotplug notifiers shouldn't unregister/re-register events */
+static bool frozen;
+
+/* Private events are registered/enabled via IPI passing one of these */
+struct sdei_crosscall_args {
+ struct sdei_event *event;
+ atomic_t errors;
+ int first_error;
+};
+
+#define CROSSCALL_INIT(arg, event) (arg.event = event, \
+ arg.first_error = 0, \
+ atomic_set(&arg.errors, 0))
+
+static inline int sdei_do_cross_call(void *fn, struct sdei_event *event)
+{
+ struct sdei_crosscall_args arg;
+
+ CROSSCALL_INIT(arg, event);
+ on_each_cpu(fn, &arg, true);
+
+ return arg.first_error;
+}
+
+static inline void
+sdei_cross_call_return(struct sdei_crosscall_args *arg, int err)
+{
+ if (err && (atomic_inc_return(&arg->errors) == 1))
+ arg->first_error = err;
+}
+
static int sdei_to_linux_errno(unsigned long sdei_err)
{
switch (sdei_err) {
@@ -210,6 +249,25 @@ static struct sdei_event *sdei_event_create(u32 event_num,
reg->callback = cb;
reg->callback_arg = cb_arg;
event->registered = reg;
+ } else {
+ int cpu;
+ struct sdei_registered_event __percpu *regs;
+
+ regs = alloc_percpu(struct sdei_registered_event);
+ if (!regs) {
+ kfree(event);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for_each_possible_cpu(cpu) {
+ reg = per_cpu_ptr(regs, cpu);
+
+ reg->event_num = event->event_num;
+ reg->callback = cb;
+ reg->callback_arg = cb_arg;
+ }
+
+ event->private_registered = regs;
}
if (sdei_event_find(event_num)) {
@@ -231,6 +289,8 @@ static void sdei_event_destroy(struct sdei_event *event)
if (event->type == SDEI_EVENT_TYPE_SHARED)
kfree(event->registered);
+ else
+ free_percpu(event->private_registered);
kfree(event);
}
@@ -261,11 +321,6 @@ static void _ipi_mask_cpu(void *ignored)
sdei_mask_local_cpu();
}
-static int sdei_cpuhp_down(unsigned int ignored)
-{
- return sdei_mask_local_cpu();
-}
-
int sdei_unmask_local_cpu(void)
{
int err;
@@ -287,11 +342,6 @@ static void _ipi_unmask_cpu(void *ignored)
sdei_unmask_local_cpu();
}
-static int sdei_cpuhp_up(unsigned int ignored)
-{
- return sdei_unmask_local_cpu();
-}
-
static void _ipi_private_reset(void *ignored)
{
int err;
@@ -342,6 +392,16 @@ static int sdei_api_event_enable(u32 event_num)
0, NULL);
}
+static void _ipi_event_enable(void *data)
+{
+ int err;
+ struct sdei_crosscall_args *arg = data;
+
+ err = sdei_api_event_enable(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+}
+
int sdei_event_enable(u32 event_num)
{
int err = -EINVAL;
@@ -353,6 +413,8 @@ int sdei_event_enable(u32 event_num)
err = -ENOENT;
else if (event->type == SDEI_EVENT_TYPE_SHARED)
err = sdei_api_event_enable(event->event_num);
+ else
+ err = sdei_do_cross_call(_ipi_event_enable, event);
spin_unlock(&sdei_events_lock);
return err;
@@ -365,6 +427,16 @@ static int sdei_api_event_disable(u32 event_num)
0, 0, NULL);
}
+static void _ipi_event_disable(void *data)
+{
+ int err;
+ struct sdei_crosscall_args *arg = data;
+
+ err = sdei_api_event_disable(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+}
+
int sdei_event_disable(u32 event_num)
{
int err = -EINVAL;
@@ -376,6 +448,9 @@ int sdei_event_disable(u32 event_num)
err = -ENOENT;
else if (event->type == SDEI_EVENT_TYPE_SHARED)
err = sdei_api_event_disable(event->event_num);
+ else
+ err = sdei_do_cross_call(_ipi_event_disable, event);
+
spin_unlock(&sdei_events_lock);
return err;
@@ -388,6 +463,27 @@ static int sdei_api_event_unregister(u32 event_num)
0, 0, 0, NULL);
}
+static void _local_event_unregister(void *data)
+{
+ int err;
+ u64 result;
+ struct sdei_registered_event *reg;
+ struct sdei_crosscall_args *arg = data;
+
+ WARN_ON_ONCE(preemptible());
+ lockdep_assert_held(&sdei_events_lock);
+
+ reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id());
+ err = sdei_api_event_status(reg->event_num, &result);
+ if (!err) {
+ reg->was_enabled = !!(result & BIT(SDEI_EVENT_STATUS_ENABLED));
+
+ err = sdei_api_event_unregister(arg->event->event_num);
+ }
+
+ sdei_cross_call_return(arg, err);
+}
+
static int _sdei_event_unregister(struct sdei_event *event)
{
int err;
@@ -408,7 +504,7 @@ static int _sdei_event_unregister(struct sdei_event *event)
return sdei_api_event_unregister(event->event_num);
}
- return -EINVAL;
+ return sdei_do_cross_call(_local_event_unregister, event);
}
int sdei_event_unregister(u32 event_num)
@@ -467,15 +563,41 @@ static int sdei_api_event_register(u32 event_num, unsigned long entry_point,
flags, affinity, NULL);
}
+static void _local_event_register(void *data)
+{
+ int err;
+ struct sdei_registered_event *reg;
+ struct sdei_crosscall_args *arg = data;
+
+ WARN_ON(preemptible());
+ lockdep_assert_held(&sdei_events_lock);
+
+ reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id());
+ err = sdei_api_event_register(arg->event->event_num, sdei_entry_point,
+ reg, 0, 0);
+
+ sdei_cross_call_return(arg, err);
+}
+
static int _sdei_event_register(struct sdei_event *event)
{
+ int err;
+
if (event->type == SDEI_EVENT_TYPE_SHARED)
return sdei_api_event_register(event->event_num,
sdei_entry_point,
event->registered,
SDEI_EVENT_REGISTER_RM_ANY, 0);
- return -EINVAL;
+ get_online_cpus();
+
+ err = sdei_do_cross_call(_local_event_register, event);
+ if (err)
+ sdei_do_cross_call(_local_event_unregister, event);
+
+ put_online_cpus();
+
+ return err;
}
int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg,
@@ -515,6 +637,22 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg,
}
EXPORT_SYMBOL(sdei_event_register);
+static void _local_event_reenable(void *data)
+{
+ int err = 0;
+ struct sdei_registered_event *reg;
+ struct sdei_crosscall_args *arg = data;
+
+ WARN_ON_ONCE(preemptible());
+ lockdep_assert_held(&sdei_events_lock);
+
+ reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id());
+ if (reg->was_enabled)
+ err = sdei_api_event_enable(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+}
+
static int sdei_reregister_event(struct sdei_event *event)
{
int err;
@@ -531,6 +669,8 @@ static int sdei_reregister_event(struct sdei_event *event)
if (event->type == SDEI_EVENT_TYPE_SHARED) {
if (event->registered->was_enabled)
err = sdei_api_event_enable(event->event_num);
+ } else {
+ err = sdei_do_cross_call(_local_event_reenable, event);
}
if (err)
@@ -555,6 +695,68 @@ static int sdei_reregister_events(void)
return err;
}
+static int sdei_cpuhp_down(unsigned int cpu)
+{
+ struct sdei_event *event;
+ struct sdei_crosscall_args arg;
+
+ if (frozen) {
+ /* All events unregistered */
+ return sdei_mask_local_cpu();
+ }
+
+ /* un-register private events */
+ spin_lock(&sdei_events_lock);
+ list_for_each_entry(event, &sdei_events, list) {
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ continue;
+
+ CROSSCALL_INIT(arg, event);
+ /* call the cross-call function locally... */
+ _local_event_unregister(&arg);
+ if (arg.first_error)
+ pr_err("Failed to unregister event %u: %d\n",
+ event->event_num, arg.first_error);
+ }
+ spin_unlock(&sdei_events_lock);
+
+ return sdei_mask_local_cpu();
+}
+
+static int sdei_cpuhp_up(unsigned int cpu)
+{
+ struct sdei_event *event;
+ struct sdei_crosscall_args arg;
+
+ if (frozen) {
+ /* Events will be re-registered when we thaw. */
+ return sdei_unmask_local_cpu();
+ }
+
+ /* re-register/enable private events */
+ spin_lock(&sdei_events_lock);
+ list_for_each_entry(event, &sdei_events, list) {
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ continue;
+
+ CROSSCALL_INIT(arg, event);
+ /* call the cross-call function locally... */
+ _local_event_register(&arg);
+ if (arg.first_error)
+ pr_err("Failed to re-register event %u: %d\n",
+ event->event_num, arg.first_error);
+
+ CROSSCALL_INIT(arg, event);
+ _local_event_reenable(&arg);
+ if (arg.first_error)
+ pr_err("Failed to re-enable event %u: %d\n",
+ event->event_num, arg.first_error);
+ }
+ spin_unlock(&sdei_events_lock);
+
+ return sdei_unmask_local_cpu();
+}
+
/* When entering idle, mask/unmask events for this cpu */
static int sdei_pm_notifier(struct notifier_block *nb, unsigned long action,
void *data)
@@ -607,6 +809,7 @@ static int sdei_device_freeze(struct device *dev)
{
int err;
+ frozen = true;
err = sdei_event_unregister_all();
if (err)
return err;
@@ -616,9 +819,13 @@ static int sdei_device_freeze(struct device *dev)
static int sdei_device_thaw(struct device *dev)
{
+ int err;
+
sdei_device_resume(dev);
- return sdei_reregister_events();
+ err = sdei_reregister_events();
+ frozen = false;
+ return err;
}
static int sdei_device_restore(struct device *dev)
@@ -650,6 +857,12 @@ static int sdei_reboot_notifier(struct notifier_block *nb, unsigned long action,
sdei_platform_reset();
+ /*
+ * There is now no point trying to unregister private events if we go on
+ * to take CPUs offline.
+ */
+ frozen = true;
+
return NOTIFY_OK;
}
--
2.13.3
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 10/11] firmware: arm_sdei: add support for CPU private events
@ 2017-08-08 16:46 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel
Private SDE events are per-cpu, and need to be registered and enabled
on each CPU.
Hide this detail from the caller by adapting our {,un}register and
{en,dis}able calls to send an IPI to each CPU if the event is private.
CPU private events are unregistered when the CPU is powered-off, and
re-registered when the CPU is brought back online. This saves bringing
secondary cores back online to call private_reset() on shutdown, kexec
and resume from hibernate.
Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since v1:
* Made unregister on cpu-down print an error and continue as returning an error
from here is ignored.
drivers/firmware/arm_sdei.c | 241 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 227 insertions(+), 14 deletions(-)
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index abe032560663..67f941246711 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -20,9 +20,11 @@
#include <linux/acpi.h>
#include <linux/arm-smccc.h>
+#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/cpuhotplug.h>
+#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/hardirq.h>
@@ -65,12 +67,49 @@ struct sdei_event {
u64 priority;
/* This pointer is handed to firmware as the event argument. */
- struct sdei_registered_event *registered;
+ union {
+ /* Shared events */
+ struct sdei_registered_event *registered;
+
+ /* CPU private events */
+ struct sdei_registered_event __percpu *private_registered;
+ };
};
static LIST_HEAD(sdei_events);
static DEFINE_SPINLOCK(sdei_events_lock);
+/* When frozen, cpu-hotplug notifiers shouldn't unregister/re-register events */
+static bool frozen;
+
+/* Private events are registered/enabled via IPI passing one of these */
+struct sdei_crosscall_args {
+ struct sdei_event *event;
+ atomic_t errors;
+ int first_error;
+};
+
+#define CROSSCALL_INIT(arg, event) (arg.event = event, \
+ arg.first_error = 0, \
+ atomic_set(&arg.errors, 0))
+
+static inline int sdei_do_cross_call(void *fn, struct sdei_event *event)
+{
+ struct sdei_crosscall_args arg;
+
+ CROSSCALL_INIT(arg, event);
+ on_each_cpu(fn, &arg, true);
+
+ return arg.first_error;
+}
+
+static inline void
+sdei_cross_call_return(struct sdei_crosscall_args *arg, int err)
+{
+ if (err && (atomic_inc_return(&arg->errors) == 1))
+ arg->first_error = err;
+}
+
static int sdei_to_linux_errno(unsigned long sdei_err)
{
switch (sdei_err) {
@@ -210,6 +249,25 @@ static struct sdei_event *sdei_event_create(u32 event_num,
reg->callback = cb;
reg->callback_arg = cb_arg;
event->registered = reg;
+ } else {
+ int cpu;
+ struct sdei_registered_event __percpu *regs;
+
+ regs = alloc_percpu(struct sdei_registered_event);
+ if (!regs) {
+ kfree(event);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for_each_possible_cpu(cpu) {
+ reg = per_cpu_ptr(regs, cpu);
+
+ reg->event_num = event->event_num;
+ reg->callback = cb;
+ reg->callback_arg = cb_arg;
+ }
+
+ event->private_registered = regs;
}
if (sdei_event_find(event_num)) {
@@ -231,6 +289,8 @@ static void sdei_event_destroy(struct sdei_event *event)
if (event->type == SDEI_EVENT_TYPE_SHARED)
kfree(event->registered);
+ else
+ free_percpu(event->private_registered);
kfree(event);
}
@@ -261,11 +321,6 @@ static void _ipi_mask_cpu(void *ignored)
sdei_mask_local_cpu();
}
-static int sdei_cpuhp_down(unsigned int ignored)
-{
- return sdei_mask_local_cpu();
-}
-
int sdei_unmask_local_cpu(void)
{
int err;
@@ -287,11 +342,6 @@ static void _ipi_unmask_cpu(void *ignored)
sdei_unmask_local_cpu();
}
-static int sdei_cpuhp_up(unsigned int ignored)
-{
- return sdei_unmask_local_cpu();
-}
-
static void _ipi_private_reset(void *ignored)
{
int err;
@@ -342,6 +392,16 @@ static int sdei_api_event_enable(u32 event_num)
0, NULL);
}
+static void _ipi_event_enable(void *data)
+{
+ int err;
+ struct sdei_crosscall_args *arg = data;
+
+ err = sdei_api_event_enable(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+}
+
int sdei_event_enable(u32 event_num)
{
int err = -EINVAL;
@@ -353,6 +413,8 @@ int sdei_event_enable(u32 event_num)
err = -ENOENT;
else if (event->type == SDEI_EVENT_TYPE_SHARED)
err = sdei_api_event_enable(event->event_num);
+ else
+ err = sdei_do_cross_call(_ipi_event_enable, event);
spin_unlock(&sdei_events_lock);
return err;
@@ -365,6 +427,16 @@ static int sdei_api_event_disable(u32 event_num)
0, 0, NULL);
}
+static void _ipi_event_disable(void *data)
+{
+ int err;
+ struct sdei_crosscall_args *arg = data;
+
+ err = sdei_api_event_disable(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+}
+
int sdei_event_disable(u32 event_num)
{
int err = -EINVAL;
@@ -376,6 +448,9 @@ int sdei_event_disable(u32 event_num)
err = -ENOENT;
else if (event->type == SDEI_EVENT_TYPE_SHARED)
err = sdei_api_event_disable(event->event_num);
+ else
+ err = sdei_do_cross_call(_ipi_event_disable, event);
+
spin_unlock(&sdei_events_lock);
return err;
@@ -388,6 +463,27 @@ static int sdei_api_event_unregister(u32 event_num)
0, 0, 0, NULL);
}
+static void _local_event_unregister(void *data)
+{
+ int err;
+ u64 result;
+ struct sdei_registered_event *reg;
+ struct sdei_crosscall_args *arg = data;
+
+ WARN_ON_ONCE(preemptible());
+ lockdep_assert_held(&sdei_events_lock);
+
+ reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id());
+ err = sdei_api_event_status(reg->event_num, &result);
+ if (!err) {
+ reg->was_enabled = !!(result & BIT(SDEI_EVENT_STATUS_ENABLED));
+
+ err = sdei_api_event_unregister(arg->event->event_num);
+ }
+
+ sdei_cross_call_return(arg, err);
+}
+
static int _sdei_event_unregister(struct sdei_event *event)
{
int err;
@@ -408,7 +504,7 @@ static int _sdei_event_unregister(struct sdei_event *event)
return sdei_api_event_unregister(event->event_num);
}
- return -EINVAL;
+ return sdei_do_cross_call(_local_event_unregister, event);
}
int sdei_event_unregister(u32 event_num)
@@ -467,15 +563,41 @@ static int sdei_api_event_register(u32 event_num, unsigned long entry_point,
flags, affinity, NULL);
}
+static void _local_event_register(void *data)
+{
+ int err;
+ struct sdei_registered_event *reg;
+ struct sdei_crosscall_args *arg = data;
+
+ WARN_ON(preemptible());
+ lockdep_assert_held(&sdei_events_lock);
+
+ reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id());
+ err = sdei_api_event_register(arg->event->event_num, sdei_entry_point,
+ reg, 0, 0);
+
+ sdei_cross_call_return(arg, err);
+}
+
static int _sdei_event_register(struct sdei_event *event)
{
+ int err;
+
if (event->type == SDEI_EVENT_TYPE_SHARED)
return sdei_api_event_register(event->event_num,
sdei_entry_point,
event->registered,
SDEI_EVENT_REGISTER_RM_ANY, 0);
- return -EINVAL;
+ get_online_cpus();
+
+ err = sdei_do_cross_call(_local_event_register, event);
+ if (err)
+ sdei_do_cross_call(_local_event_unregister, event);
+
+ put_online_cpus();
+
+ return err;
}
int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg,
@@ -515,6 +637,22 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg,
}
EXPORT_SYMBOL(sdei_event_register);
+static void _local_event_reenable(void *data)
+{
+ int err = 0;
+ struct sdei_registered_event *reg;
+ struct sdei_crosscall_args *arg = data;
+
+ WARN_ON_ONCE(preemptible());
+ lockdep_assert_held(&sdei_events_lock);
+
+ reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id());
+ if (reg->was_enabled)
+ err = sdei_api_event_enable(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+}
+
static int sdei_reregister_event(struct sdei_event *event)
{
int err;
@@ -531,6 +669,8 @@ static int sdei_reregister_event(struct sdei_event *event)
if (event->type == SDEI_EVENT_TYPE_SHARED) {
if (event->registered->was_enabled)
err = sdei_api_event_enable(event->event_num);
+ } else {
+ err = sdei_do_cross_call(_local_event_reenable, event);
}
if (err)
@@ -555,6 +695,68 @@ static int sdei_reregister_events(void)
return err;
}
+static int sdei_cpuhp_down(unsigned int cpu)
+{
+ struct sdei_event *event;
+ struct sdei_crosscall_args arg;
+
+ if (frozen) {
+ /* All events unregistered */
+ return sdei_mask_local_cpu();
+ }
+
+ /* un-register private events */
+ spin_lock(&sdei_events_lock);
+ list_for_each_entry(event, &sdei_events, list) {
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ continue;
+
+ CROSSCALL_INIT(arg, event);
+ /* call the cross-call function locally... */
+ _local_event_unregister(&arg);
+ if (arg.first_error)
+ pr_err("Failed to unregister event %u: %d\n",
+ event->event_num, arg.first_error);
+ }
+ spin_unlock(&sdei_events_lock);
+
+ return sdei_mask_local_cpu();
+}
+
+static int sdei_cpuhp_up(unsigned int cpu)
+{
+ struct sdei_event *event;
+ struct sdei_crosscall_args arg;
+
+ if (frozen) {
+ /* Events will be re-registered when we thaw. */
+ return sdei_unmask_local_cpu();
+ }
+
+ /* re-register/enable private events */
+ spin_lock(&sdei_events_lock);
+ list_for_each_entry(event, &sdei_events, list) {
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ continue;
+
+ CROSSCALL_INIT(arg, event);
+ /* call the cross-call function locally... */
+ _local_event_register(&arg);
+ if (arg.first_error)
+ pr_err("Failed to re-register event %u: %d\n",
+ event->event_num, arg.first_error);
+
+ CROSSCALL_INIT(arg, event);
+ _local_event_reenable(&arg);
+ if (arg.first_error)
+ pr_err("Failed to re-enable event %u: %d\n",
+ event->event_num, arg.first_error);
+ }
+ spin_unlock(&sdei_events_lock);
+
+ return sdei_unmask_local_cpu();
+}
+
/* When entering idle, mask/unmask events for this cpu */
static int sdei_pm_notifier(struct notifier_block *nb, unsigned long action,
void *data)
@@ -607,6 +809,7 @@ static int sdei_device_freeze(struct device *dev)
{
int err;
+ frozen = true;
err = sdei_event_unregister_all();
if (err)
return err;
@@ -616,9 +819,13 @@ static int sdei_device_freeze(struct device *dev)
static int sdei_device_thaw(struct device *dev)
{
+ int err;
+
sdei_device_resume(dev);
- return sdei_reregister_events();
+ err = sdei_reregister_events();
+ frozen = false;
+ return err;
}
static int sdei_device_restore(struct device *dev)
@@ -650,6 +857,12 @@ static int sdei_reboot_notifier(struct notifier_block *nb, unsigned long action,
sdei_platform_reset();
+ /*
+ * There is now no point trying to unregister private events if we go on
+ * to take CPUs offline.
+ */
+ frozen = true;
+
return NOTIFY_OK;
}
--
2.13.3
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 11/11] KVM: arm64: Allow user-space to claim guest SMC-CC ranges for SDEI
2017-08-08 16:46 ` James Morse
@ 2017-08-08 16:46 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
Cc: kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho, James Morse
Instead of supporting SDEI in KVM, and providing a new API to
control and configure the in-kernel support, allow user-space to
request particular SMC-CC ranges from guest HVC calls to be handled
by user-space.
This requires two KVM capabilities, KVM_CAP_ARM_SDEI_1_0 advertises
that KVM knows how match SDEI SMC-CC calls from a guest. To pass these
calls to user-space requires this cap to be enabled using
KVM_CAP_ENABLE_CAP_VM.
Calls are passed with exit_reason = KVM_EXIT_HYPERCALL, the kvm_run
structure has copies of the first 6 registers and the guest pstate.
Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
---
While I'm in here, remove KVM_CAP_ARM_SET_DEVICE_ADDR's extra entry
for r=1;break?
Changes from v1:
* all of it
Documentation/virtual/kvm/api.txt | 12 +++++++++---
arch/arm64/include/asm/kvm_host.h | 6 ++++++
arch/arm64/kvm/handle_exit.c | 28 +++++++++++++++++++++++++++-
include/uapi/linux/kvm.h | 1 +
virt/kvm/arm/arm.c | 29 +++++++++++++++++++++++++++--
5 files changed, 70 insertions(+), 6 deletions(-)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e63a35fafef0..740288d6e894 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1012,7 +1012,7 @@ documentation when it pops into existence).
4.37 KVM_ENABLE_CAP
Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
-Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
+Architectures: x86, arm, arm64 (only KVM_CAP_ENABLE_CAP_VM),
mips (only KVM_CAP_ENABLE_CAP), ppc, s390
Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
Parameters: struct kvm_enable_cap (in)
@@ -3540,10 +3540,16 @@ pending operations.
__u32 pad;
} hypercall;
-Unused. This was once used for 'hypercall to userspace'. To implement
-such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
+This was once used for 'hypercall to userspace'. To implement such
+functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
+On arm64 this is used to complete guest hypercalls (HVC) in user space.
+e.g. Configuring SDEI or communicating with an emulated TEE.
+The required HVC ranges must first be enabled by KVM_CAP_ENABLE_CAP_VM.
+The 'args' array contains a copy of r0-r5 and 'longmode' contains a copy
+of the CPSR/PSTATE.
+
/* KVM_EXIT_TPR_ACCESS */
struct {
__u64 rip;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7733492d9a35..77b8436e745e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -71,8 +71,14 @@ struct kvm_arch {
/* Interrupt controller */
struct vgic_dist vgic;
+
+ /* SMC-CC/HVC ranges user space has requested */
+ u32 hvc_passthrough_ranges;
};
+/* SMC-CC/HVC ranges that can be passed to user space */
+#define KVM_HVC_RANGE_SDEI 1
+
#define KVM_NR_MEM_OBJS 40
/*
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 17d8a1677a0b..22eadf2cd82f 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -21,6 +21,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/sdei.h>
#include <asm/esr.h>
#include <asm/kvm_asm.h>
@@ -34,15 +35,40 @@
typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+#define HVC_PASSTHROUGH(kvm, range) (kvm->arch.hvc_passthrough_ranges & range)
+
+/*
+ * The guest made an SMC-CC call that user-space has claimed.
+ */
+static int populate_hypercall_exit(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int i;
+
+ run->exit_reason = KVM_EXIT_HYPERCALL;
+
+ for (i = 0 ; i < ARRAY_SIZE(run->hypercall.args); i++)
+ run->hypercall.args[i] = vcpu_get_reg(vcpu, i);
+
+ run->hypercall.longmode = *vcpu_cpsr(vcpu);
+
+ return 0;
+}
+
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int ret;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long x0 = vcpu_get_reg(vcpu, 0);
trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
- ret = kvm_psci_call(vcpu);
+ if (IS_SDEI_CALL(x0) && HVC_PASSTHROUGH(kvm, KVM_HVC_RANGE_SDEI))
+ ret = populate_hypercall_exit(vcpu, run);
+ else
+ ret = kvm_psci_call(vcpu);
+
if (ret < 0) {
kvm_inject_undefined(vcpu);
return 1;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6cd63c18708a..8e6bc6ba918d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -929,6 +929,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_SMT_POSSIBLE 147
#define KVM_CAP_HYPERV_SYNIC2 148
#define KVM_CAP_HYPERV_VP_INDEX 149
+#define KVM_CAP_ARM_SDEI_1_0 150
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index e9765ee6d769..b8657b68fea7 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -206,8 +206,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_READONLY_MEM:
case KVM_CAP_MP_STATE:
case KVM_CAP_IMMEDIATE_EXIT:
- r = 1;
- break;
+ case KVM_CAP_ENABLE_CAP_VM:
+#ifdef CONFIG_ARM64
+ case KVM_CAP_ARM_SDEI_1_0:
+#endif
case KVM_CAP_ARM_SET_DEVICE_ADDR:
r = 1;
break;
@@ -1082,6 +1084,21 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
}
}
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *req)
+{
+ int err = -EINVAL;
+
+ if (req->flags)
+ return err;
+
+ if (IS_ENABLED(CONFIG_ARM64) && req->cap == KVM_CAP_ARM_SDEI_1_0) {
+ kvm->arch.hvc_passthrough_ranges |= KVM_HVC_RANGE_SDEI;
+ err = 0;
+ }
+
+ return err;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1118,6 +1135,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
return 0;
}
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap req;
+
+ if (copy_from_user(&req, argp, sizeof(req)))
+ return -EFAULT;
+
+ return kvm_vm_ioctl_enable_cap(kvm, &req);
+ }
default:
return -EINVAL;
}
--
2.13.3
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 38+ messages in thread
* [PATCH v2 11/11] KVM: arm64: Allow user-space to claim guest SMC-CC ranges for SDEI
@ 2017-08-08 16:46 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-08-08 16:46 UTC (permalink / raw)
To: linux-arm-kernel
Instead of supporting SDEI in KVM, and providing a new API to
control and configure the in-kernel support, allow user-space to
request particular SMC-CC ranges from guest HVC calls to be handled
by user-space.
This requires two KVM capabilities, KVM_CAP_ARM_SDEI_1_0 advertises
that KVM knows how match SDEI SMC-CC calls from a guest. To pass these
calls to user-space requires this cap to be enabled using
KVM_CAP_ENABLE_CAP_VM.
Calls are passed with exit_reason = KVM_EXIT_HYPERCALL, the kvm_run
structure has copies of the first 6 registers and the guest pstate.
Signed-off-by: James Morse <james.morse@arm.com>
---
While I'm in here, remove KVM_CAP_ARM_SET_DEVICE_ADDR's extra entry
for r=1;break?
Changes from v1:
* all of it
Documentation/virtual/kvm/api.txt | 12 +++++++++---
arch/arm64/include/asm/kvm_host.h | 6 ++++++
arch/arm64/kvm/handle_exit.c | 28 +++++++++++++++++++++++++++-
include/uapi/linux/kvm.h | 1 +
virt/kvm/arm/arm.c | 29 +++++++++++++++++++++++++++--
5 files changed, 70 insertions(+), 6 deletions(-)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e63a35fafef0..740288d6e894 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1012,7 +1012,7 @@ documentation when it pops into existence).
4.37 KVM_ENABLE_CAP
Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
-Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
+Architectures: x86, arm, arm64 (only KVM_CAP_ENABLE_CAP_VM),
mips (only KVM_CAP_ENABLE_CAP), ppc, s390
Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
Parameters: struct kvm_enable_cap (in)
@@ -3540,10 +3540,16 @@ pending operations.
__u32 pad;
} hypercall;
-Unused. This was once used for 'hypercall to userspace'. To implement
-such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
+This was once used for 'hypercall to userspace'. To implement such
+functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
+On arm64 this is used to complete guest hypercalls (HVC) in user space.
+e.g. Configuring SDEI or communicating with an emulated TEE.
+The required HVC ranges must first be enabled by KVM_CAP_ENABLE_CAP_VM.
+The 'args' array contains a copy of r0-r5 and 'longmode' contains a copy
+of the CPSR/PSTATE.
+
/* KVM_EXIT_TPR_ACCESS */
struct {
__u64 rip;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7733492d9a35..77b8436e745e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -71,8 +71,14 @@ struct kvm_arch {
/* Interrupt controller */
struct vgic_dist vgic;
+
+ /* SMC-CC/HVC ranges user space has requested */
+ u32 hvc_passthrough_ranges;
};
+/* SMC-CC/HVC ranges that can be passed to user space */
+#define KVM_HVC_RANGE_SDEI 1
+
#define KVM_NR_MEM_OBJS 40
/*
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 17d8a1677a0b..22eadf2cd82f 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -21,6 +21,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/sdei.h>
#include <asm/esr.h>
#include <asm/kvm_asm.h>
@@ -34,15 +35,40 @@
typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+#define HVC_PASSTHROUGH(kvm, range) (kvm->arch.hvc_passthrough_ranges & range)
+
+/*
+ * The guest made an SMC-CC call that user-space has claimed.
+ */
+static int populate_hypercall_exit(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int i;
+
+ run->exit_reason = KVM_EXIT_HYPERCALL;
+
+ for (i = 0 ; i < ARRAY_SIZE(run->hypercall.args); i++)
+ run->hypercall.args[i] = vcpu_get_reg(vcpu, i);
+
+ run->hypercall.longmode = *vcpu_cpsr(vcpu);
+
+ return 0;
+}
+
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int ret;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long x0 = vcpu_get_reg(vcpu, 0);
trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
- ret = kvm_psci_call(vcpu);
+ if (IS_SDEI_CALL(x0) && HVC_PASSTHROUGH(kvm, KVM_HVC_RANGE_SDEI))
+ ret = populate_hypercall_exit(vcpu, run);
+ else
+ ret = kvm_psci_call(vcpu);
+
if (ret < 0) {
kvm_inject_undefined(vcpu);
return 1;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6cd63c18708a..8e6bc6ba918d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -929,6 +929,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_SMT_POSSIBLE 147
#define KVM_CAP_HYPERV_SYNIC2 148
#define KVM_CAP_HYPERV_VP_INDEX 149
+#define KVM_CAP_ARM_SDEI_1_0 150
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index e9765ee6d769..b8657b68fea7 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -206,8 +206,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_READONLY_MEM:
case KVM_CAP_MP_STATE:
case KVM_CAP_IMMEDIATE_EXIT:
- r = 1;
- break;
+ case KVM_CAP_ENABLE_CAP_VM:
+#ifdef CONFIG_ARM64
+ case KVM_CAP_ARM_SDEI_1_0:
+#endif
case KVM_CAP_ARM_SET_DEVICE_ADDR:
r = 1;
break;
@@ -1082,6 +1084,21 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
}
}
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *req)
+{
+ int err = -EINVAL;
+
+ if (req->flags)
+ return err;
+
+ if (IS_ENABLED(CONFIG_ARM64) && req->cap == KVM_CAP_ARM_SDEI_1_0) {
+ kvm->arch.hvc_passthrough_ranges |= KVM_HVC_RANGE_SDEI;
+ err = 0;
+ }
+
+ return err;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1118,6 +1135,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
return 0;
}
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap req;
+
+ if (copy_from_user(&req, argp, sizeof(req)))
+ return -EFAULT;
+
+ return kvm_vm_ioctl_enable_cap(kvm, &req);
+ }
default:
return -EINVAL;
}
--
2.13.3
^ permalink raw reply related [flat|nested] 38+ messages in thread
[parent not found: <20170808164616.25949-12-james.morse-5wv7dgnIgG8@public.gmane.org>]
* Re: [PATCH v2 11/11] KVM: arm64: Allow user-space to claim guest SMC-CC ranges for SDEI
2017-08-08 16:46 ` James Morse
@ 2017-09-17 14:43 ` Christoffer Dall
-1 siblings, 0 replies; 38+ messages in thread
From: Christoffer Dall @ 2017-09-17 14:43 UTC (permalink / raw)
To: James Morse
Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho
On Tue, Aug 08, 2017 at 05:46:16PM +0100, James Morse wrote:
> Instead of supporting SDEI in KVM, and providing a new API to
> control and configure the in-kernel support, allow user-space to
> request particular SMC-CC ranges from guest HVC calls to be handled
> by user-space.
>
> This requires two KVM capabilities, KVM_CAP_ARM_SDEI_1_0 advertises
> that KVM knows how match SDEI SMC-CC calls from a guest. To pass these
> calls to user-space requires this cap to be enabled using
> KVM_CAP_ENABLE_CAP_VM.
>
> Calls are passed with exit_reason = KVM_EXIT_HYPERCALL, the kvm_run
> structure has copies of the first 6 registers and the guest pstate.
>
> Signed-off-by: James Morse <james.morse-5wv7dgnIgG8@public.gmane.org>
> ---
> While I'm in here, remove KVM_CAP_ARM_SET_DEVICE_ADDR's extra entry
> for r=1;break?
>
> Changes from v1:
> * all of it
>
> Documentation/virtual/kvm/api.txt | 12 +++++++++---
> arch/arm64/include/asm/kvm_host.h | 6 ++++++
> arch/arm64/kvm/handle_exit.c | 28 +++++++++++++++++++++++++++-
> include/uapi/linux/kvm.h | 1 +
> virt/kvm/arm/arm.c | 29 +++++++++++++++++++++++++++--
> 5 files changed, 70 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index e63a35fafef0..740288d6e894 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -1012,7 +1012,7 @@ documentation when it pops into existence).
> 4.37 KVM_ENABLE_CAP
>
> Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
> -Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
> +Architectures: x86, arm, arm64 (only KVM_CAP_ENABLE_CAP_VM),
> mips (only KVM_CAP_ENABLE_CAP), ppc, s390
> Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
> Parameters: struct kvm_enable_cap (in)
> @@ -3540,10 +3540,16 @@ pending operations.
> __u32 pad;
> } hypercall;
>
> -Unused. This was once used for 'hypercall to userspace'. To implement
> -such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
> +This was once used for 'hypercall to userspace'. To implement such
> +functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
I suggest you add 'for x86' to these two sentences, otherwise the
following paragraph seems contradicting.
> Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
>
> +On arm64 this is used to complete guest hypercalls (HVC) in user space.
> +e.g. Configuring SDEI or communicating with an emulated TEE.
s/e.g./For example,/
> +The required HVC ranges must first be enabled by KVM_CAP_ENABLE_CAP_VM.
> +The 'args' array contains a copy of r0-r5 and 'longmode' contains a copy
> +of the CPSR/PSTATE.
> +
I'm not entirely sure what this means by just looking at this text. Do
you mean that some HVC (and SMC?) calls can still be handled by the
kernel directly (PSCI), and some can be configured to be handled by the
kernel? Where does it specify how these 'ranges' are defined?
What about the immediate field, could userspace not also need to know
this?
> /* KVM_EXIT_TPR_ACCESS */
> struct {
> __u64 rip;
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 7733492d9a35..77b8436e745e 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -71,8 +71,14 @@ struct kvm_arch {
>
> /* Interrupt controller */
> struct vgic_dist vgic;
> +
> + /* SMC-CC/HVC ranges user space has requested */
> + u32 hvc_passthrough_ranges;
> };
>
> +/* SMC-CC/HVC ranges that can be passed to user space */
> +#define KVM_HVC_RANGE_SDEI 1
Oh, I see you want the kernel to know what constitutes a range for some
functionality and set things up that way.
What are your thoughts on letting userspace configure a range of from/to
values in x0 for some value/range of the immediate field?
> +
> #define KVM_NR_MEM_OBJS 40
>
> /*
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index 17d8a1677a0b..22eadf2cd82f 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -21,6 +21,7 @@
>
> #include <linux/kvm.h>
> #include <linux/kvm_host.h>
> +#include <linux/sdei.h>
>
> #include <asm/esr.h>
> #include <asm/kvm_asm.h>
> @@ -34,15 +35,40 @@
>
> typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
>
> +#define HVC_PASSTHROUGH(kvm, range) (kvm->arch.hvc_passthrough_ranges & range)
I think this should have parenthesis around (range) ?
Are we not going to support this for SMC passthrough? (for nested virt
for example).
> +
> +/*
> + * The guest made an SMC-CC call that user-space has claimed.
> + */
> +static int populate_hypercall_exit(struct kvm_vcpu *vcpu, struct kvm_run *run)
> +{
> + int i;
> +
> + run->exit_reason = KVM_EXIT_HYPERCALL;
> +
> + for (i = 0 ; i < ARRAY_SIZE(run->hypercall.args); i++)
> + run->hypercall.args[i] = vcpu_get_reg(vcpu, i);
> +
> + run->hypercall.longmode = *vcpu_cpsr(vcpu);
> +
> + return 0;
> +}
> +
> static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
> {
> int ret;
> + struct kvm *kvm = vcpu->kvm;
> + unsigned long x0 = vcpu_get_reg(vcpu, 0);
>
> trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
> kvm_vcpu_hvc_get_imm(vcpu));
> vcpu->stat.hvc_exit_stat++;
>
> - ret = kvm_psci_call(vcpu);
> + if (IS_SDEI_CALL(x0) && HVC_PASSTHROUGH(kvm, KVM_HVC_RANGE_SDEI))
> + ret = populate_hypercall_exit(vcpu, run);
> + else
> + ret = kvm_psci_call(vcpu);
> +
> if (ret < 0) {
> kvm_inject_undefined(vcpu);
> return 1;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 6cd63c18708a..8e6bc6ba918d 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -929,6 +929,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_PPC_SMT_POSSIBLE 147
> #define KVM_CAP_HYPERV_SYNIC2 148
> #define KVM_CAP_HYPERV_VP_INDEX 149
> +#define KVM_CAP_ARM_SDEI_1_0 150
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index e9765ee6d769..b8657b68fea7 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -206,8 +206,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> case KVM_CAP_READONLY_MEM:
> case KVM_CAP_MP_STATE:
> case KVM_CAP_IMMEDIATE_EXIT:
> - r = 1;
> - break;
> + case KVM_CAP_ENABLE_CAP_VM:
> +#ifdef CONFIG_ARM64
> + case KVM_CAP_ARM_SDEI_1_0:
> +#endif
> case KVM_CAP_ARM_SET_DEVICE_ADDR:
> r = 1;
> break;
> @@ -1082,6 +1084,21 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
> }
> }
>
> +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *req)
> +{
> + int err = -EINVAL;
> +
> + if (req->flags)
> + return err;
> +
> + if (IS_ENABLED(CONFIG_ARM64) && req->cap == KVM_CAP_ARM_SDEI_1_0) {
> + kvm->arch.hvc_passthrough_ranges |= KVM_HVC_RANGE_SDEI;
> + err = 0;
> + }
> +
> + return err;
> +}
> +
> long kvm_arch_vm_ioctl(struct file *filp,
> unsigned int ioctl, unsigned long arg)
> {
> @@ -1118,6 +1135,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
>
> return 0;
> }
> + case KVM_ENABLE_CAP: {
> + struct kvm_enable_cap req;
> +
> + if (copy_from_user(&req, argp, sizeof(req)))
> + return -EFAULT;
> +
> + return kvm_vm_ioctl_enable_cap(kvm, &req);
> + }
> default:
> return -EINVAL;
> }
> --
> 2.13.3
>
Thanks,
-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 11/11] KVM: arm64: Allow user-space to claim guest SMC-CC ranges for SDEI
@ 2017-09-17 14:43 ` Christoffer Dall
0 siblings, 0 replies; 38+ messages in thread
From: Christoffer Dall @ 2017-09-17 14:43 UTC (permalink / raw)
To: linux-arm-kernel
On Tue, Aug 08, 2017 at 05:46:16PM +0100, James Morse wrote:
> Instead of supporting SDEI in KVM, and providing a new API to
> control and configure the in-kernel support, allow user-space to
> request particular SMC-CC ranges from guest HVC calls to be handled
> by user-space.
>
> This requires two KVM capabilities, KVM_CAP_ARM_SDEI_1_0 advertises
> that KVM knows how match SDEI SMC-CC calls from a guest. To pass these
> calls to user-space requires this cap to be enabled using
> KVM_CAP_ENABLE_CAP_VM.
>
> Calls are passed with exit_reason = KVM_EXIT_HYPERCALL, the kvm_run
> structure has copies of the first 6 registers and the guest pstate.
>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> While I'm in here, remove KVM_CAP_ARM_SET_DEVICE_ADDR's extra entry
> for r=1;break?
>
> Changes from v1:
> * all of it
>
> Documentation/virtual/kvm/api.txt | 12 +++++++++---
> arch/arm64/include/asm/kvm_host.h | 6 ++++++
> arch/arm64/kvm/handle_exit.c | 28 +++++++++++++++++++++++++++-
> include/uapi/linux/kvm.h | 1 +
> virt/kvm/arm/arm.c | 29 +++++++++++++++++++++++++++--
> 5 files changed, 70 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index e63a35fafef0..740288d6e894 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -1012,7 +1012,7 @@ documentation when it pops into existence).
> 4.37 KVM_ENABLE_CAP
>
> Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
> -Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
> +Architectures: x86, arm, arm64 (only KVM_CAP_ENABLE_CAP_VM),
> mips (only KVM_CAP_ENABLE_CAP), ppc, s390
> Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
> Parameters: struct kvm_enable_cap (in)
> @@ -3540,10 +3540,16 @@ pending operations.
> __u32 pad;
> } hypercall;
>
> -Unused. This was once used for 'hypercall to userspace'. To implement
> -such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
> +This was once used for 'hypercall to userspace'. To implement such
> +functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
I suggest you add 'for x86' to these two sentences, otherwise the
following paragraph seems contradicting.
> Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
>
> +On arm64 this is used to complete guest hypercalls (HVC) in user space.
> +e.g. Configuring SDEI or communicating with an emulated TEE.
s/e.g./For example,/
> +The required HVC ranges must first be enabled by KVM_CAP_ENABLE_CAP_VM.
> +The 'args' array contains a copy of r0-r5 and 'longmode' contains a copy
> +of the CPSR/PSTATE.
> +
I'm not entirely sure what this means by just looking at this text. Do
you mean that some HVC (and SMC?) calls can still be handled by the
kernel directly (PSCI), and some can be configured to be handled by the
kernel? Where does it specify how these 'ranges' are defined?
What about the immediate field, could userspace not also need to know
this?
> /* KVM_EXIT_TPR_ACCESS */
> struct {
> __u64 rip;
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 7733492d9a35..77b8436e745e 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -71,8 +71,14 @@ struct kvm_arch {
>
> /* Interrupt controller */
> struct vgic_dist vgic;
> +
> + /* SMC-CC/HVC ranges user space has requested */
> + u32 hvc_passthrough_ranges;
> };
>
> +/* SMC-CC/HVC ranges that can be passed to user space */
> +#define KVM_HVC_RANGE_SDEI 1
Oh, I see you want the kernel to know what constitutes a range for some
functionality and set things up that way.
What are your thoughts on letting userspace configure a range of from/to
values in x0 for some value/range of the immediate field?
> +
> #define KVM_NR_MEM_OBJS 40
>
> /*
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index 17d8a1677a0b..22eadf2cd82f 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -21,6 +21,7 @@
>
> #include <linux/kvm.h>
> #include <linux/kvm_host.h>
> +#include <linux/sdei.h>
>
> #include <asm/esr.h>
> #include <asm/kvm_asm.h>
> @@ -34,15 +35,40 @@
>
> typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
>
> +#define HVC_PASSTHROUGH(kvm, range) (kvm->arch.hvc_passthrough_ranges & range)
I think this should have parenthesis around (range) ?
Are we not going to support this for SMC passthrough? (for nested virt
for example).
> +
> +/*
> + * The guest made an SMC-CC call that user-space has claimed.
> + */
> +static int populate_hypercall_exit(struct kvm_vcpu *vcpu, struct kvm_run *run)
> +{
> + int i;
> +
> + run->exit_reason = KVM_EXIT_HYPERCALL;
> +
> + for (i = 0 ; i < ARRAY_SIZE(run->hypercall.args); i++)
> + run->hypercall.args[i] = vcpu_get_reg(vcpu, i);
> +
> + run->hypercall.longmode = *vcpu_cpsr(vcpu);
> +
> + return 0;
> +}
> +
> static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
> {
> int ret;
> + struct kvm *kvm = vcpu->kvm;
> + unsigned long x0 = vcpu_get_reg(vcpu, 0);
>
> trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
> kvm_vcpu_hvc_get_imm(vcpu));
> vcpu->stat.hvc_exit_stat++;
>
> - ret = kvm_psci_call(vcpu);
> + if (IS_SDEI_CALL(x0) && HVC_PASSTHROUGH(kvm, KVM_HVC_RANGE_SDEI))
> + ret = populate_hypercall_exit(vcpu, run);
> + else
> + ret = kvm_psci_call(vcpu);
> +
> if (ret < 0) {
> kvm_inject_undefined(vcpu);
> return 1;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 6cd63c18708a..8e6bc6ba918d 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -929,6 +929,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_PPC_SMT_POSSIBLE 147
> #define KVM_CAP_HYPERV_SYNIC2 148
> #define KVM_CAP_HYPERV_VP_INDEX 149
> +#define KVM_CAP_ARM_SDEI_1_0 150
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index e9765ee6d769..b8657b68fea7 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -206,8 +206,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> case KVM_CAP_READONLY_MEM:
> case KVM_CAP_MP_STATE:
> case KVM_CAP_IMMEDIATE_EXIT:
> - r = 1;
> - break;
> + case KVM_CAP_ENABLE_CAP_VM:
> +#ifdef CONFIG_ARM64
> + case KVM_CAP_ARM_SDEI_1_0:
> +#endif
> case KVM_CAP_ARM_SET_DEVICE_ADDR:
> r = 1;
> break;
> @@ -1082,6 +1084,21 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
> }
> }
>
> +static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *req)
> +{
> + int err = -EINVAL;
> +
> + if (req->flags)
> + return err;
> +
> + if (IS_ENABLED(CONFIG_ARM64) && req->cap == KVM_CAP_ARM_SDEI_1_0) {
> + kvm->arch.hvc_passthrough_ranges |= KVM_HVC_RANGE_SDEI;
> + err = 0;
> + }
> +
> + return err;
> +}
> +
> long kvm_arch_vm_ioctl(struct file *filp,
> unsigned int ioctl, unsigned long arg)
> {
> @@ -1118,6 +1135,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
>
> return 0;
> }
> + case KVM_ENABLE_CAP: {
> + struct kvm_enable_cap req;
> +
> + if (copy_from_user(&req, argp, sizeof(req)))
> + return -EFAULT;
> +
> + return kvm_vm_ioctl_enable_cap(kvm, &req);
> + }
> default:
> return -EINVAL;
> }
> --
> 2.13.3
>
Thanks,
-Christoffer
^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH v2 11/11] KVM: arm64: Allow user-space to claim guest SMC-CC ranges for SDEI
2017-09-17 14:43 ` Christoffer Dall
@ 2017-09-19 15:37 ` James Morse
-1 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-09-19 15:37 UTC (permalink / raw)
To: Christoffer Dall
Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
kvmarm-FPEHb7Xf0XXUo1n7N8X6UoWGPAHP3yOg,
devicetree-u79uwXL29TY76Z2rM5mHXA, Will Deacon, Catalin Marinas,
Marc Zyngier, Christoffer Dall, Mark Rutland, Rob Herring,
Loc Ho
Hi Christoffer,
On 17/09/17 15:43, Christoffer Dall wrote:
> On Tue, Aug 08, 2017 at 05:46:16PM +0100, James Morse wrote:
>> Instead of supporting SDEI in KVM, and providing a new API to
>> control and configure the in-kernel support, allow user-space to
>> request particular SMC-CC ranges from guest HVC calls to be handled
>> by user-space.
>>
>> This requires two KVM capabilities, KVM_CAP_ARM_SDEI_1_0 advertises
>> that KVM knows how match SDEI SMC-CC calls from a guest. To pass these
>> calls to user-space requires this cap to be enabled using
>> KVM_CAP_ENABLE_CAP_VM.
>>
>> Calls are passed with exit_reason = KVM_EXIT_HYPERCALL, the kvm_run
>> structure has copies of the first 6 registers and the guest pstate.
>> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
>> index e63a35fafef0..740288d6e894 100644
>> --- a/Documentation/virtual/kvm/api.txt
>> +++ b/Documentation/virtual/kvm/api.txt
>> @@ -1012,7 +1012,7 @@ documentation when it pops into existence).
>> 4.37 KVM_ENABLE_CAP
>>
>> Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
>> -Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
>> +Architectures: x86, arm, arm64 (only KVM_CAP_ENABLE_CAP_VM),
>> mips (only KVM_CAP_ENABLE_CAP), ppc, s390
>> Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
>> Parameters: struct kvm_enable_cap (in)
>> @@ -3540,10 +3540,16 @@ pending operations.
>> __u32 pad;
>> } hypercall;
>>
>> -Unused. This was once used for 'hypercall to userspace'. To implement
>> -such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
>> +This was once used for 'hypercall to userspace'. To implement such
>> +functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
> I suggest you add 'for x86' to these two sentences, otherwise the
> following paragraph seems contradicting.
Does the second sentence apply to just x86? 'KVM_EXIT_MMIO (all except s390)'
I've change this to:
> This was once used for 'hypercall to userspace' on x86. To implement such
> functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
>> Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
>>
>> +On arm64 this is used to complete guest hypercalls (HVC) in user space.
>> +e.g. Configuring SDEI or communicating with an emulated TEE.
>
> s/e.g./For example,/
>
>> +The required HVC ranges must first be enabled by KVM_CAP_ENABLE_CAP_VM.
>> +The 'args' array contains a copy of r0-r5 and 'longmode' contains a copy
>> +of the CPSR/PSTATE.
> I'm not entirely sure what this means by just looking at this text. Do
> you mean that some HVC (and SMC?) calls can still be handled by the
> kernel directly (PSCI), and some can be configured to be handled by the
> kernel? Where does it specify how these 'ranges' are defined?
I'm looking at this as just ARM's SMC-CC services, (which looks like too narrow
a view). I had expected the kernel to know which ranges its willing to let
user-space drive, once they have a KVM_HVC_RANGE_ value userspace can try and
claim them.
This fits nicely with the kernel emulating PSCI by default, but looks wrong once
SMC and the immediates creep in too.
> What about the immediate field, could userspace not also need to know
> this?
Yes, I'd ignored that due to the narrow focus...
>> /* KVM_EXIT_TPR_ACCESS */
>> struct {
>> __u64 rip;
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index 7733492d9a35..77b8436e745e 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -71,8 +71,14 @@ struct kvm_arch {
>>
>> /* Interrupt controller */
>> struct vgic_dist vgic;
>> +
>> + /* SMC-CC/HVC ranges user space has requested */
>> + u32 hvc_passthrough_ranges;
>> };
>>
>> +/* SMC-CC/HVC ranges that can be passed to user space */
>> +#define KVM_HVC_RANGE_SDEI 1
> Oh, I see you want the kernel to know what constitutes a range for some
> functionality and set things up that way.
> What are your thoughts on letting userspace configure a range of from/to
> values in x0 for some value/range of the immediate field?
The only user we have today is PSCI. It we let userspace specify the x0 ranges
it may try and claim PSCI, or worse only part of the range the kernel uses for
its PSCI emulation. I'd like to avoid these corners.
The logic was the kernel has PSCI emulation so it already knows about particular
ranges, hence the grouping so that only whole ranges can be claimed...
.. but this doesn't fit with exposing the immediate values or SMC/HVC, both of
which have a bigger scope than SMC-CC.
Looking again at Marc's comments on this[0, 1], we could consider an 'all or
nothing' approach. If user-space wants the SDEI or any other range it has to
claim the lot, and emulate PSCI itself. This is more work for userspace in the
short term, but its less work for userspace and the kernel in the long run.
(I assume Qemu already has a PSCI implementation for its TCG mode..)
>> +
>> #define KVM_NR_MEM_OBJS 40
>>
>> /*
>> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
>> index 17d8a1677a0b..22eadf2cd82f 100644
>> --- a/arch/arm64/kvm/handle_exit.c
>> +++ b/arch/arm64/kvm/handle_exit.c
>> @@ -21,6 +21,7 @@
>>
>> #include <linux/kvm.h>
>> #include <linux/kvm_host.h>
>> +#include <linux/sdei.h>
>>
>> #include <asm/esr.h>
>> #include <asm/kvm_asm.h>
>> @@ -34,15 +35,40 @@
>>
>> typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
>>
>> +#define HVC_PASSTHROUGH(kvm, range) (kvm->arch.hvc_passthrough_ranges & range)
> I think this should have parenthesis around (range) ?
>
> Are we not going to support this for SMC passthrough? (for nested virt
> for example).
Ah, I hadn't thought about that, (and I don't know much about how it works).
I assume user-space needs to know which of SMC/HVC the guest made, to route to
the appropriate guest{n}-hypervisor, and these probably need enabling
independently as SMC isn't guaranteed to be trappable, (how can KVM know if
HCR_EL2.TSC is secretly ignored?).
(/me looks at the condition code ESR_EL2.ISS bits for SMC trapped from aarch32)
... I'm not going to get this done this week, and as it no longer has any ties
to SDEI I'll drop this patch from the series and post it independently.
Thanks,
James
[0] https://lkml.org/lkml/2017/3/22/196
[1] https://patchwork.kernel.org/patch/9886029/
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 38+ messages in thread
* [PATCH v2 11/11] KVM: arm64: Allow user-space to claim guest SMC-CC ranges for SDEI
@ 2017-09-19 15:37 ` James Morse
0 siblings, 0 replies; 38+ messages in thread
From: James Morse @ 2017-09-19 15:37 UTC (permalink / raw)
To: linux-arm-kernel
Hi Christoffer,
On 17/09/17 15:43, Christoffer Dall wrote:
> On Tue, Aug 08, 2017 at 05:46:16PM +0100, James Morse wrote:
>> Instead of supporting SDEI in KVM, and providing a new API to
>> control and configure the in-kernel support, allow user-space to
>> request particular SMC-CC ranges from guest HVC calls to be handled
>> by user-space.
>>
>> This requires two KVM capabilities, KVM_CAP_ARM_SDEI_1_0 advertises
>> that KVM knows how match SDEI SMC-CC calls from a guest. To pass these
>> calls to user-space requires this cap to be enabled using
>> KVM_CAP_ENABLE_CAP_VM.
>>
>> Calls are passed with exit_reason = KVM_EXIT_HYPERCALL, the kvm_run
>> structure has copies of the first 6 registers and the guest pstate.
>> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
>> index e63a35fafef0..740288d6e894 100644
>> --- a/Documentation/virtual/kvm/api.txt
>> +++ b/Documentation/virtual/kvm/api.txt
>> @@ -1012,7 +1012,7 @@ documentation when it pops into existence).
>> 4.37 KVM_ENABLE_CAP
>>
>> Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
>> -Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
>> +Architectures: x86, arm, arm64 (only KVM_CAP_ENABLE_CAP_VM),
>> mips (only KVM_CAP_ENABLE_CAP), ppc, s390
>> Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
>> Parameters: struct kvm_enable_cap (in)
>> @@ -3540,10 +3540,16 @@ pending operations.
>> __u32 pad;
>> } hypercall;
>>
>> -Unused. This was once used for 'hypercall to userspace'. To implement
>> -such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
>> +This was once used for 'hypercall to userspace'. To implement such
>> +functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
> I suggest you add 'for x86' to these two sentences, otherwise the
> following paragraph seems contradicting.
Does the second sentence apply to just x86? 'KVM_EXIT_MMIO (all except s390)'
I've change this to:
> This was once used for 'hypercall to userspace' on x86. To implement such
> functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
>> Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
>>
>> +On arm64 this is used to complete guest hypercalls (HVC) in user space.
>> +e.g. Configuring SDEI or communicating with an emulated TEE.
>
> s/e.g./For example,/
>
>> +The required HVC ranges must first be enabled by KVM_CAP_ENABLE_CAP_VM.
>> +The 'args' array contains a copy of r0-r5 and 'longmode' contains a copy
>> +of the CPSR/PSTATE.
> I'm not entirely sure what this means by just looking at this text. Do
> you mean that some HVC (and SMC?) calls can still be handled by the
> kernel directly (PSCI), and some can be configured to be handled by the
> kernel? Where does it specify how these 'ranges' are defined?
I'm looking at this as just ARM's SMC-CC services, (which looks like too narrow
a view). I had expected the kernel to know which ranges its willing to let
user-space drive, once they have a KVM_HVC_RANGE_ value userspace can try and
claim them.
This fits nicely with the kernel emulating PSCI by default, but looks wrong once
SMC and the immediates creep in too.
> What about the immediate field, could userspace not also need to know
> this?
Yes, I'd ignored that due to the narrow focus...
>> /* KVM_EXIT_TPR_ACCESS */
>> struct {
>> __u64 rip;
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index 7733492d9a35..77b8436e745e 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -71,8 +71,14 @@ struct kvm_arch {
>>
>> /* Interrupt controller */
>> struct vgic_dist vgic;
>> +
>> + /* SMC-CC/HVC ranges user space has requested */
>> + u32 hvc_passthrough_ranges;
>> };
>>
>> +/* SMC-CC/HVC ranges that can be passed to user space */
>> +#define KVM_HVC_RANGE_SDEI 1
> Oh, I see you want the kernel to know what constitutes a range for some
> functionality and set things up that way.
> What are your thoughts on letting userspace configure a range of from/to
> values in x0 for some value/range of the immediate field?
The only user we have today is PSCI. It we let userspace specify the x0 ranges
it may try and claim PSCI, or worse only part of the range the kernel uses for
its PSCI emulation. I'd like to avoid these corners.
The logic was the kernel has PSCI emulation so it already knows about particular
ranges, hence the grouping so that only whole ranges can be claimed...
.. but this doesn't fit with exposing the immediate values or SMC/HVC, both of
which have a bigger scope than SMC-CC.
Looking again at Marc's comments on this[0, 1], we could consider an 'all or
nothing' approach. If user-space wants the SDEI or any other range it has to
claim the lot, and emulate PSCI itself. This is more work for userspace in the
short term, but its less work for userspace and the kernel in the long run.
(I assume Qemu already has a PSCI implementation for its TCG mode..)
>> +
>> #define KVM_NR_MEM_OBJS 40
>>
>> /*
>> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
>> index 17d8a1677a0b..22eadf2cd82f 100644
>> --- a/arch/arm64/kvm/handle_exit.c
>> +++ b/arch/arm64/kvm/handle_exit.c
>> @@ -21,6 +21,7 @@
>>
>> #include <linux/kvm.h>
>> #include <linux/kvm_host.h>
>> +#include <linux/sdei.h>
>>
>> #include <asm/esr.h>
>> #include <asm/kvm_asm.h>
>> @@ -34,15 +35,40 @@
>>
>> typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
>>
>> +#define HVC_PASSTHROUGH(kvm, range) (kvm->arch.hvc_passthrough_ranges & range)
> I think this should have parenthesis around (range) ?
>
> Are we not going to support this for SMC passthrough? (for nested virt
> for example).
Ah, I hadn't thought about that, (and I don't know much about how it works).
I assume user-space needs to know which of SMC/HVC the guest made, to route to
the appropriate guest{n}-hypervisor, and these probably need enabling
independently as SMC isn't guaranteed to be trappable, (how can KVM know if
HCR_EL2.TSC is secretly ignored?).
(/me looks at the condition code ESR_EL2.ISS bits for SMC trapped from aarch32)
... I'm not going to get this done this week, and as it no longer has any ties
to SDEI I'll drop this patch from the series and post it independently.
Thanks,
James
[0] https://lkml.org/lkml/2017/3/22/196
[1] https://patchwork.kernel.org/patch/9886029/
^ permalink raw reply [flat|nested] 38+ messages in thread