[PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature

linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
@ 2017-04-30  5:37 Dongjiu Geng
  2017-04-30  5:37 ` [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome Dongjiu Geng
                   ` (3 more replies)
  0 siblings, 4 replies; 31+ messages in thread
From: Dongjiu Geng @ 2017-04-30  5:37 UTC (permalink / raw)
  To: linux-arm-kernel

Handle kvmtool's detection for RAS extension, because sometimes
the APP needs to know the CPU's capacity

Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
---
 arch/arm64/kvm/reset.c   | 11 +++++++++++
 include/uapi/linux/kvm.h |  1 +
 2 files changed, 12 insertions(+)

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index d9e9697..1004039 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -64,6 +64,14 @@ static bool cpu_has_32bit_el1(void)
 	return !!(pfr0 & 0x20);
 }
 
+static bool kvm_arm_support_ras_extension(void)
+{
+	u64 pfr0;
+
+	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
+	return !!(pfr0 & 0x10000000);
+}
+
 /**
  * kvm_arch_dev_ioctl_check_extension
  *
@@ -87,6 +95,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ARM_PMU_V3:
 		r = kvm_arm_support_pmu_v3();
 		break;
+	case KVM_CAP_ARM_RAS_EXTENSION:
+		r = kvm_arm_support_ras_extension();
+		break;
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_VCPU_ATTRIBUTES:
 		r = 1;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f51d508..27fe556 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -883,6 +883,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_MMU_RADIX 134
 #define KVM_CAP_PPC_MMU_HASH_V3 135
 #define KVM_CAP_IMMEDIATE_EXIT 136
+#define KVM_CAP_ARM_RAS_EXTENSION 137
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
2.10.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome
  2017-04-30  5:37 [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Dongjiu Geng
@ 2017-04-30  5:37 ` Dongjiu Geng
  2017-05-02  8:03   ` Christoffer Dall
  2017-05-02 15:37   ` James Morse
  2017-04-30  5:37 ` [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error Dongjiu Geng
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 31+ messages in thread
From: Dongjiu Geng @ 2017-04-30  5:37 UTC (permalink / raw)
  To: linux-arm-kernel

when SError happen, kvm notifies kvmtool to generate GHES table
to record the error, then kvmtools inject the SError with specified
virtual syndrome. when switch to guest, a virtual SError will happen with
this specified syndrome.

Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
---
 arch/arm64/include/asm/esr.h         |  2 ++
 arch/arm64/include/asm/kvm_emulate.h | 10 ++++++++++
 arch/arm64/include/asm/kvm_host.h    |  1 +
 arch/arm64/include/asm/sysreg.h      |  3 +++
 arch/arm64/kvm/handle_exit.c         | 25 +++++++++++++++++++------
 arch/arm64/kvm/hyp/switch.c          | 15 ++++++++++++++-
 include/uapi/linux/kvm.h             |  5 +++++
 7 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 22f9c90..d009c99 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -127,6 +127,8 @@
 #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
 #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
 
+#define VSESR_ELx_IDS_ISS_MASK    ((1UL << 25) - 1)
+
 /* ESR value templates for specific events */
 
 /* BRK instruction trap from AArch64 state */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f5ea0ba..a3259a9 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -148,6 +148,16 @@ static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
 	return vcpu->arch.fault.esr_el2;
 }
 
+static inline u32 kvm_vcpu_get_vsesr(const struct kvm_vcpu *vcpu)
+{
+		return vcpu->arch.fault.vsesr_el2;
+}
+
+static inline void kvm_vcpu_set_vsesr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+		vcpu->arch.fault.vsesr_el2 = val;
+}
+
 static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
 {
 	u32 esr = kvm_vcpu_get_hsr(vcpu);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e7705e7..84ed239 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -86,6 +86,7 @@ struct kvm_vcpu_fault_info {
 	u32 esr_el2;		/* Hyp Syndrom Register */
 	u64 far_el2;		/* Hyp Fault Address Register */
 	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
+	u32 vsesr_el2;		/* Virtual SError Exception Syndrome Register */
 };
 
 /*
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 32964c7..b6afb7a 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -125,6 +125,9 @@
 #define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
 #define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
 
+#define VSESR_EL2			sys_reg(3, 4, 5, 2, 3)
+
+
 #define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
 				      (!!x)<<8 | 0x1f)
 #define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index c89d83a..3d024a9 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -180,7 +180,11 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 
 static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	unsigned long fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	unsigned long hva, fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	struct kvm_memory_slot *memslot;
+	int hsr, ret = 1;
+	bool writable;
+	gfn_t gfn;
 
 	if (handle_guest_sei((unsigned long)fault_ipa,
 				kvm_vcpu_get_hsr(vcpu))) {
@@ -190,9 +194,20 @@ static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
 				(unsigned long)kvm_vcpu_get_hsr(vcpu));
 
 		kvm_inject_vabt(vcpu);
+	} else {
+		hsr = kvm_vcpu_get_hsr(vcpu);
+
+		gfn = fault_ipa >> PAGE_SHIFT;
+		memslot = gfn_to_memslot(vcpu->kvm, gfn);
+		hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+
+		run->exit_reason = KVM_EXIT_INTR;
+		run->intr.syndrome_info = hsr;
+		run->intr.address = hva;
+		ret = 0;
 	}
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -218,8 +233,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 			*vcpu_pc(vcpu) -= adj;
 		}
 
-		kvm_handle_guest_sei(vcpu, run);
-		return 1;
+		return kvm_handle_guest_sei(vcpu, run);
 	}
 
 	exception_index = ARM_EXCEPTION_CODE(exception_index);
@@ -228,8 +242,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	case ARM_EXCEPTION_IRQ:
 		return 1;
 	case ARM_EXCEPTION_EL1_SERROR:
-		kvm_handle_guest_sei(vcpu, run);
-		return 1;
+		return kvm_handle_guest_sei(vcpu, run);
 	case ARM_EXCEPTION_TRAP:
 		/*
 		 * See ARM ARM B1.14.1: "Hyp traps on instructions
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index aede165..ded6211 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -86,6 +86,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
 		isb();
 	}
 	write_sysreg(val, hcr_el2);
+    /* If virtual System Error or Asynchronous Abort is pending. set
+     * the virtual exception syndrome information
+     */
+	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&
+			(vcpu->arch.hcr_el2 & HCR_VSE))
+		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
+
 	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
 	write_sysreg(1 << 15, hstr_el2);
 	/*
@@ -139,9 +146,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 	 * the crucial bit is "On taking a vSError interrupt,
 	 * HCR_EL2.VSE is cleared to 0."
 	 */
-	if (vcpu->arch.hcr_el2 & HCR_VSE)
+	if (vcpu->arch.hcr_el2 & HCR_VSE) {
 		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
 
+		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
+			/* set vsesr_el2[24:0] with esr_el2[24:0] */
+			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
+					& VSESR_ELx_IDS_ISS_MASK);
+		}
+	}
 	__deactivate_traps_arch()();
 	write_sysreg(0, hstr_el2);
 	write_sysreg(0, pmuserenr_el0);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 27fe556..bb02909 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -360,6 +360,11 @@ struct kvm_run {
 		struct {
 			__u8 vector;
 		} eoi;
+		/* KVM_EXIT_INTR */
+		struct {
+			__u32 syndrome_info;
+			__u64 address;
+		} intr;
 		/* KVM_EXIT_HYPERV */
 		struct kvm_hyperv_exit hyperv;
 		/* Fix the size of the union. */
-- 
2.10.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome
  2017-04-30  5:37 ` [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome Dongjiu Geng
@ 2017-05-02  8:03   ` Christoffer Dall
  2017-05-02 12:20     ` gengdongjiu
  2017-05-02 15:37   ` James Morse
  1 sibling, 1 reply; 31+ messages in thread
From: Christoffer Dall @ 2017-05-02  8:03 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, Apr 30, 2017 at 01:37:56PM +0800, Dongjiu Geng wrote:
> when SError happen, kvm notifies kvmtool to generate GHES table
> to record the error, then kvmtools inject the SError with specified

again, is this really specific to kvmtool?  Pleae try to explain this
mechanism in generic terms.

> virtual syndrome. when switch to guest, a virtual SError will happen with
> this specified syndrome.
> 
> Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
> ---
>  arch/arm64/include/asm/esr.h         |  2 ++
>  arch/arm64/include/asm/kvm_emulate.h | 10 ++++++++++
>  arch/arm64/include/asm/kvm_host.h    |  1 +
>  arch/arm64/include/asm/sysreg.h      |  3 +++
>  arch/arm64/kvm/handle_exit.c         | 25 +++++++++++++++++++------
>  arch/arm64/kvm/hyp/switch.c          | 15 ++++++++++++++-
>  include/uapi/linux/kvm.h             |  5 +++++
>  7 files changed, 54 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
> index 22f9c90..d009c99 100644
> --- a/arch/arm64/include/asm/esr.h
> +++ b/arch/arm64/include/asm/esr.h
> @@ -127,6 +127,8 @@
>  #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
>  #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
>  
> +#define VSESR_ELx_IDS_ISS_MASK    ((1UL << 25) - 1)
> +
>  /* ESR value templates for specific events */
>  
>  /* BRK instruction trap from AArch64 state */
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index f5ea0ba..a3259a9 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -148,6 +148,16 @@ static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
>  	return vcpu->arch.fault.esr_el2;
>  }
>  
> +static inline u32 kvm_vcpu_get_vsesr(const struct kvm_vcpu *vcpu)
> +{
> +		return vcpu->arch.fault.vsesr_el2;
> +}
> +
> +static inline void kvm_vcpu_set_vsesr(struct kvm_vcpu *vcpu, unsigned long val)
> +{
> +		vcpu->arch.fault.vsesr_el2 = val;
> +}
> +
>  static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
>  {
>  	u32 esr = kvm_vcpu_get_hsr(vcpu);
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index e7705e7..84ed239 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -86,6 +86,7 @@ struct kvm_vcpu_fault_info {
>  	u32 esr_el2;		/* Hyp Syndrom Register */
>  	u64 far_el2;		/* Hyp Fault Address Register */
>  	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
> +	u32 vsesr_el2;		/* Virtual SError Exception Syndrome Register */
>  };
>  
>  /*
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 32964c7..b6afb7a 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -125,6 +125,9 @@
>  #define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
>  #define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
>  
> +#define VSESR_EL2			sys_reg(3, 4, 5, 2, 3)
> +
> +
>  #define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
>  				      (!!x)<<8 | 0x1f)
>  #define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index c89d83a..3d024a9 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -180,7 +180,11 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
>  
>  static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
> -	unsigned long fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
> +	unsigned long hva, fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
> +	struct kvm_memory_slot *memslot;
> +	int hsr, ret = 1;
> +	bool writable;
> +	gfn_t gfn;
>  
>  	if (handle_guest_sei((unsigned long)fault_ipa,
>  				kvm_vcpu_get_hsr(vcpu))) {
> @@ -190,9 +194,20 @@ static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  				(unsigned long)kvm_vcpu_get_hsr(vcpu));
>  
>  		kvm_inject_vabt(vcpu);
> +	} else {
> +		hsr = kvm_vcpu_get_hsr(vcpu);
> +
> +		gfn = fault_ipa >> PAGE_SHIFT;
> +		memslot = gfn_to_memslot(vcpu->kvm, gfn);
> +		hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
> +
> +		run->exit_reason = KVM_EXIT_INTR;
> +		run->intr.syndrome_info = hsr;
> +		run->intr.address = hva;
> +		ret = 0;
>  	}
>  
> -	return 0;
> +	return ret;
>  }
>  
>  /*
> @@ -218,8 +233,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  			*vcpu_pc(vcpu) -= adj;
>  		}
>  
> -		kvm_handle_guest_sei(vcpu, run);
> -		return 1;
> +		return kvm_handle_guest_sei(vcpu, run);
>  	}
>  
>  	exception_index = ARM_EXCEPTION_CODE(exception_index);
> @@ -228,8 +242,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  	case ARM_EXCEPTION_IRQ:
>  		return 1;
>  	case ARM_EXCEPTION_EL1_SERROR:
> -		kvm_handle_guest_sei(vcpu, run);
> -		return 1;
> +		return kvm_handle_guest_sei(vcpu, run);
>  	case ARM_EXCEPTION_TRAP:
>  		/*
>  		 * See ARM ARM B1.14.1: "Hyp traps on instructions
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index aede165..ded6211 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -86,6 +86,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
>  		isb();
>  	}
>  	write_sysreg(val, hcr_el2);
> +    /* If virtual System Error or Asynchronous Abort is pending. set

nit: I think you want a comma after pending, not a dot.

> +     * the virtual exception syndrome information
> +     */

nit: commenting style

> +	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&
> +			(vcpu->arch.hcr_el2 & HCR_VSE))
> +		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
> +
>  	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
>  	write_sysreg(1 << 15, hstr_el2);
>  	/*
> @@ -139,9 +146,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
>  	 * the crucial bit is "On taking a vSError interrupt,
>  	 * HCR_EL2.VSE is cleared to 0."
>  	 */
> -	if (vcpu->arch.hcr_el2 & HCR_VSE)
> +	if (vcpu->arch.hcr_el2 & HCR_VSE) {
>  		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
>  
> +		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
> +			/* set vsesr_el2[24:0] with esr_el2[24:0] */
> +			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
> +					& VSESR_ELx_IDS_ISS_MASK);
> +		}
> +	}
>  	__deactivate_traps_arch()();
>  	write_sysreg(0, hstr_el2);
>  	write_sysreg(0, pmuserenr_el0);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 27fe556..bb02909 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -360,6 +360,11 @@ struct kvm_run {
>  		struct {
>  			__u8 vector;
>  		} eoi;
> +		/* KVM_EXIT_INTR */
> +		struct {
> +			__u32 syndrome_info;
> +			__u64 address;
> +		} intr;

definitely, not.  KVM_EXIT_INTR is a generic exit code to tell userspace
that we exited because we needed to deliver a signal or something else
related to an asynchronous event.  This implies that the syndrome_info
etc. always has valid values on all architectures when exiting with
KVM_EXIT_INTR.

Either document the behavior as the syndrome_info has side-channel
information on every exit, or on some KVM_EXIT_INTR exits, as we explain
in the KVM_CAP_ARM_USER_IRQ ABI that was just added, or dedicate an
access code.

>  		/* KVM_EXIT_HYPERV */
>  		struct kvm_hyperv_exit hyperv;
>  		/* Fix the size of the union. */
> -- 
> 2.10.1
> 

I'll look at the details of such patches once the ABI is clear and
well-documented.

Thanks,
-Christoffer

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome
  2017-05-02  8:03   ` Christoffer Dall
@ 2017-05-02 12:20     ` gengdongjiu
  0 siblings, 0 replies; 31+ messages in thread
From: gengdongjiu @ 2017-05-02 12:20 UTC (permalink / raw)
  To: linux-arm-kernel

Hello Christoffer.


On 2017/5/2 16:03, Christoffer Dall wrote:
> On Sun, Apr 30, 2017 at 01:37:56PM +0800, Dongjiu Geng wrote:
>> when SError happen, kvm notifies kvmtool to generate GHES table
>> to record the error, then kvmtools inject the SError with specified
> 
> again, is this really specific to kvmtool?  Pleae try to explain this
> mechanism in generic terms.
  It is both for qemu and other userspace application. I will correct it.

> 
>> virtual syndrome. when switch to guest, a virtual SError will happen with
>> this specified syndrome.
>>
>> Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
>> ---
>>  arch/arm64/include/asm/esr.h         |  2 ++
>>  arch/arm64/include/asm/kvm_emulate.h | 10 ++++++++++
>>  arch/arm64/include/asm/kvm_host.h    |  1 +
>>  arch/arm64/include/asm/sysreg.h      |  3 +++
>>  arch/arm64/kvm/handle_exit.c         | 25 +++++++++++++++++++------
>>  arch/arm64/kvm/hyp/switch.c          | 15 ++++++++++++++-
>>  include/uapi/linux/kvm.h             |  5 +++++
>>  7 files changed, 54 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
>> index 22f9c90..d009c99 100644
>> --- a/arch/arm64/include/asm/esr.h
>> +++ b/arch/arm64/include/asm/esr.h
>> @@ -127,6 +127,8 @@
>>  #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
>>  #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
>>  
>> +#define VSESR_ELx_IDS_ISS_MASK    ((1UL << 25) - 1)
>> +
>>  /* ESR value templates for specific events */
>>  
>>  /* BRK instruction trap from AArch64 state */
>> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
>> index f5ea0ba..a3259a9 100644
>> --- a/arch/arm64/include/asm/kvm_emulate.h
>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>> @@ -148,6 +148,16 @@ static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
>>  	return vcpu->arch.fault.esr_el2;
>>  }
>>  
>> +static inline u32 kvm_vcpu_get_vsesr(const struct kvm_vcpu *vcpu)
>> +{
>> +		return vcpu->arch.fault.vsesr_el2;
>> +}
>> +
>> +static inline void kvm_vcpu_set_vsesr(struct kvm_vcpu *vcpu, unsigned long val)
>> +{
>> +		vcpu->arch.fault.vsesr_el2 = val;
>> +}
>> +
>>  static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
>>  {
>>  	u32 esr = kvm_vcpu_get_hsr(vcpu);
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index e7705e7..84ed239 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -86,6 +86,7 @@ struct kvm_vcpu_fault_info {
>>  	u32 esr_el2;		/* Hyp Syndrom Register */
>>  	u64 far_el2;		/* Hyp Fault Address Register */
>>  	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
>> +	u32 vsesr_el2;		/* Virtual SError Exception Syndrome Register */
>>  };
>>  
>>  /*
>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> index 32964c7..b6afb7a 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -125,6 +125,9 @@
>>  #define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
>>  #define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
>>  
>> +#define VSESR_EL2			sys_reg(3, 4, 5, 2, 3)
>> +
>> +
>>  #define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
>>  				      (!!x)<<8 | 0x1f)
>>  #define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
>> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
>> index c89d83a..3d024a9 100644
>> --- a/arch/arm64/kvm/handle_exit.c
>> +++ b/arch/arm64/kvm/handle_exit.c
>> @@ -180,7 +180,11 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
>>  
>>  static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>  {
>> -	unsigned long fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
>> +	unsigned long hva, fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
>> +	struct kvm_memory_slot *memslot;
>> +	int hsr, ret = 1;
>> +	bool writable;
>> +	gfn_t gfn;
>>  
>>  	if (handle_guest_sei((unsigned long)fault_ipa,
>>  				kvm_vcpu_get_hsr(vcpu))) {
>> @@ -190,9 +194,20 @@ static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>  				(unsigned long)kvm_vcpu_get_hsr(vcpu));
>>  
>>  		kvm_inject_vabt(vcpu);
>> +	} else {
>> +		hsr = kvm_vcpu_get_hsr(vcpu);
>> +
>> +		gfn = fault_ipa >> PAGE_SHIFT;
>> +		memslot = gfn_to_memslot(vcpu->kvm, gfn);
>> +		hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
>> +
>> +		run->exit_reason = KVM_EXIT_INTR;
>> +		run->intr.syndrome_info = hsr;
>> +		run->intr.address = hva;
>> +		ret = 0;
>>  	}
>>  
>> -	return 0;
>> +	return ret;
>>  }
>>  
>>  /*
>> @@ -218,8 +233,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>  			*vcpu_pc(vcpu) -= adj;
>>  		}
>>  
>> -		kvm_handle_guest_sei(vcpu, run);
>> -		return 1;
>> +		return kvm_handle_guest_sei(vcpu, run);
>>  	}
>>  
>>  	exception_index = ARM_EXCEPTION_CODE(exception_index);
>> @@ -228,8 +242,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>  	case ARM_EXCEPTION_IRQ:
>>  		return 1;
>>  	case ARM_EXCEPTION_EL1_SERROR:
>> -		kvm_handle_guest_sei(vcpu, run);
>> -		return 1;
>> +		return kvm_handle_guest_sei(vcpu, run);
>>  	case ARM_EXCEPTION_TRAP:
>>  		/*
>>  		 * See ARM ARM B1.14.1: "Hyp traps on instructions
>> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>> index aede165..ded6211 100644
>> --- a/arch/arm64/kvm/hyp/switch.c
>> +++ b/arch/arm64/kvm/hyp/switch.c
>> @@ -86,6 +86,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
>>  		isb();
>>  	}
>>  	write_sysreg(val, hcr_el2);
>> +    /* If virtual System Error or Asynchronous Abort is pending. set
> 
> nit: I think you want a comma after pending, not a dot.
> 
>> +     * the virtual exception syndrome information
>> +     */
> 
> nit: commenting style
> 
>> +	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&
>> +			(vcpu->arch.hcr_el2 & HCR_VSE))
>> +		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
>> +
>>  	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
>>  	write_sysreg(1 << 15, hstr_el2);
>>  	/*
>> @@ -139,9 +146,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
>>  	 * the crucial bit is "On taking a vSError interrupt,
>>  	 * HCR_EL2.VSE is cleared to 0."
>>  	 */
>> -	if (vcpu->arch.hcr_el2 & HCR_VSE)
>> +	if (vcpu->arch.hcr_el2 & HCR_VSE) {
>>  		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
>>  
>> +		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
>> +			/* set vsesr_el2[24:0] with esr_el2[24:0] */
>> +			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
>> +					& VSESR_ELx_IDS_ISS_MASK);
>> +		}
>> +	}
>>  	__deactivate_traps_arch()();
>>  	write_sysreg(0, hstr_el2);
>>  	write_sysreg(0, pmuserenr_el0);
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 27fe556..bb02909 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -360,6 +360,11 @@ struct kvm_run {
>>  		struct {
>>  			__u8 vector;
>>  		} eoi;
>> +		/* KVM_EXIT_INTR */
>> +		struct {
>> +			__u32 syndrome_info;
>> +			__u64 address;
>> +		} intr;
> 
> definitely, not.  KVM_EXIT_INTR is a generic exit code to tell userspace
> that we exited because we needed to deliver a signal or something else
> related to an asynchronous event.  This implies that the syndrome_info
> etc. always has valid values on all architectures when exiting with
> KVM_EXIT_INTR.
> 
> Either document the behavior as the syndrome_info has side-channel
> information on every exit, or on some KVM_EXIT_INTR exits, as we explain
> in the KVM_CAP_ARM_USER_IRQ ABI that was just added, or dedicate an
> access code.
OK.

> 
>>  		/* KVM_EXIT_HYPERV */
>>  		struct kvm_hyperv_exit hyperv;
>>  		/* Fix the size of the union. */
>> -- 
>> 2.10.1
>>
> 
> I'll look at the details of such patches once the ABI is clear and
> well-documented.
  OK.

> 
> Thanks,
> -Christoffer
> 
> .
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome
  2017-04-30  5:37 ` [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome Dongjiu Geng
  2017-05-02  8:03   ` Christoffer Dall
@ 2017-05-02 15:37   ` James Morse
  2017-05-05 13:19     ` gengdongjiu
  1 sibling, 1 reply; 31+ messages in thread
From: James Morse @ 2017-05-02 15:37 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Dongjiu Geng,

On 30/04/17 06:37, Dongjiu Geng wrote:
> when SError happen, kvm notifies kvmtool to generate GHES table
> to record the error, then kvmtools inject the SError with specified
> virtual syndrome. when switch to guest, a virtual SError will happen with
> this specified syndrome.

GHES records in the HEST (T)able have to be generated before the OS starts as
these are read at boot. Did you mean generate CPER records?

It looks like this is based on the earlier SEI series, please work together and
post a combined series when there are changes. (It also good to summarise the
changes in the cover letter.)

This patch is modifying the world-switch to save/restore VSESR. You should
explain that VSESR is the Virtual SError Syndrome, it becomes the ESR value when
HCR_EL2.VSE injects an SError. This register was added by the RAS Extensions and
needs patching in or guarding.

> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index aede165..ded6211 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -86,6 +86,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
>  		isb();
>  	}
>  	write_sysreg(val, hcr_el2);
> +    /* If virtual System Error or Asynchronous Abort is pending. set
> +     * the virtual exception syndrome information
> +     */
> +	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&

Is cpus_have_cap() safe to use at EL2?
This would be the first user, and it accesses cpu_hwcaps. You probably want
something like the static_branch_unlikely()s in the vgic code elsewhere in this
file.

> +			(vcpu->arch.hcr_el2 & HCR_VSE))
> +		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
> +

I think this would be clearer if you took this out to a helper method called
something like restore_vsesr() and did the if(cap|VSE) stuff there.

(Nit: comment style)

>  	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
>  	write_sysreg(1 << 15, hstr_el2);
>  	/*
> @@ -139,9 +146,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
>  	 * the crucial bit is "On taking a vSError interrupt,
>  	 * HCR_EL2.VSE is cleared to 0."
>  	 */
> -	if (vcpu->arch.hcr_el2 & HCR_VSE)
> +	if (vcpu->arch.hcr_el2 & HCR_VSE) {
>  		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
>  
> +		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
> +			/* set vsesr_el2[24:0] with esr_el2[24:0] */
> +			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
> +					& VSESR_ELx_IDS_ISS_MASK);

There is no need for KVM to save the VSESR. It is copied to ESR_EL1 when
HCR_EL2.VSE delivers the SError, after this we don't care what the register
value is. When we switch to a guest we should set the value from KVM whenever
the VSE bit is set. We should never read back the value in hardware.

Why read ESR_EL2? This will give you a completely unrelated value. If EL2 takes
an IRQ or a page fault between pending the SError and delivering it, we
overwrite the value set by KVM or user-space with a stray EL2 value.

... I think you expect an SError to arrive at EL2 and have its ESR recorded in
vcpu->arch.fault.vsesr_el2. Some time later KVM decides to inject an SError into
the guest, and this ESR is reused...

We shouldn't do this. Qemu/kvmtool may want to inject a virtual-SError that
never started as a physical-SError. Qemu/kvmtool may choose to notify the guest
of RAS events via another mechanism, or not at all.

KVM should not give the guest an ESR value of its choice. For SError the ESR
describes whether the error is corrected, correctable or fatal. Qemu/kvmtool
must choose this.

I think we need an API that allows Qemu/kvmtool to inject SError into a guest,
but that isn't quite what you have here.

The VSESR value should always come from user space. The only exception are
SErrors that we know weren't due to RAS: for these we should set the VSESR to
zero to keep the existing behaviour.

Thanks,

James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome
  2017-05-02 15:37   ` James Morse
@ 2017-05-05 13:19     ` gengdongjiu
  2017-05-12 17:24       ` James Morse
  0 siblings, 1 reply; 31+ messages in thread
From: gengdongjiu @ 2017-05-05 13:19 UTC (permalink / raw)
  To: linux-arm-kernel

Hi james,

   Thanks for your detailed suggestion.

On 2017/5/2 23:37, James Morse wrote:
> Hi Dongjiu Geng,
> 
> On 30/04/17 06:37, Dongjiu Geng wrote:
>> when SError happen, kvm notifies kvmtool to generate GHES table
>> to record the error, then kvmtools inject the SError with specified
>> virtual syndrome. when switch to guest, a virtual SError will happen with
>> this specified syndrome.
> 
> GHES records in the HEST (T)able have to be generated before the OS starts as
> these are read at boot. Did you mean generate CPER records?
 you are quite right that should generate CPER records.


> 
> 
> It looks like this is based on the earlier SEI series, please work together and
> post a combined series when there are changes. (It also good to summarise the
> changes in the cover letter.)
Ok.

> 
> This patch is modifying the world-switch to save/restore VSESR. You should
> explain that VSESR is the Virtual SError Syndrome, it becomes the ESR value when
> HCR_EL2.VSE injects an SError. This register was added by the RAS Extensions and
> needs patching in or guarding.
yes, you are right.

> 
> 
>> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>> index aede165..ded6211 100644
>> --- a/arch/arm64/kvm/hyp/switch.c
>> +++ b/arch/arm64/kvm/hyp/switch.c
>> @@ -86,6 +86,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
>>  		isb();
>>  	}
>>  	write_sysreg(val, hcr_el2);
>> +    /* If virtual System Error or Asynchronous Abort is pending. set
>> +     * the virtual exception syndrome information
>> +     */
>> +	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&
> 
> Is cpus_have_cap() safe to use at EL2?
> This would be the first user, and it accesses cpu_hwcaps. You probably want
> something like the static_branch_unlikely()s in the vgic code elsewhere in this
> file.
> 
> 
>> +			(vcpu->arch.hcr_el2 & HCR_VSE))
>> +		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
>> +
> 
> I think this would be clearer if you took this out to a helper method called
> something like restore_vsesr() and did the if(cap|VSE) stuff there.
  good suggestion.

> 
> (Nit: comment style)
 OK.

> 
> 
>>  	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
>>  	write_sysreg(1 << 15, hstr_el2);
>>  	/*
>> @@ -139,9 +146,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
>>  	 * the crucial bit is "On taking a vSError interrupt,
>>  	 * HCR_EL2.VSE is cleared to 0."
>>  	 */
>> -	if (vcpu->arch.hcr_el2 & HCR_VSE)
>> +	if (vcpu->arch.hcr_el2 & HCR_VSE) {
>>  		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
>>  
>> +		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
>> +			/* set vsesr_el2[24:0] with esr_el2[24:0] */
>> +			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
>> +					& VSESR_ELx_IDS_ISS_MASK);
> 
> There is no need for KVM to save the VSESR. It is copied to ESR_EL1 when
> HCR_EL2.VSE delivers the SError, after this we don't care what the register
> value is. When we switch to a guest we should set the value from KVM whenever
> the VSE bit is set. We should never read back the value in hardware.
  I think you are right. Thanks for your points out.

> 
> Why read ESR_EL2? This will give you a completely unrelated value. If EL2 takes
> an IRQ or a page fault between pending the SError and delivering it, we
> overwrite the value set by KVM or user-space with a stray EL2 value.
> 
> 
> ... I think you expect an SError to arrive at EL2 and have its ESR recorded in
> vcpu->arch.fault.vsesr_el2. Some time later KVM decides to inject an SError into
> the guest, and this ESR is reused...
> 
> We shouldn't do this. Qemu/kvmtool may want to inject a virtual-SError that
> never started as a physical-SError. Qemu/kvmtool may choose to notify the guest
> of RAS events via another mechanism, or not at all.
> 
> KVM should not give the guest an ESR value of its choice. For SError the ESR
> describes whether the error is corrected, correctable or fatal. Qemu/kvmtool
> must choose this.

Below is my previous solution:
For the SError, CPU will firstly trap to EL3 firmware and records the syndrome to ESR_EL3.
Before jumping to El2 hypervisors, it will copy the esr_el3 to esr_el2.
so in order to pass this syndrome to vsesr_el2, using the esr_el2 value to assign it.


If Qemu/kvmtool chooses the ESR value and ESR only describes whether the error is corrected/correctable/fatal,
whether the information is not enough for the guest?


>
> I think we need an API that allows Qemu/kvmtool to inject SError into a guest,
> but that isn't quite what you have here.

KVM provides APIs to inject the SError, Qemu/kvmtool call the API though IOCTL, may be OK?


> 
> The VSESR value should always come from user space. The only exception are
> SErrors that we know weren't due to RAS: for these we should set the VSESR to
> zero to keep the existing behaviour.
> 
> 
> Thanks,
> 
> James
> .
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome
  2017-05-05 13:19     ` gengdongjiu
@ 2017-05-12 17:24       ` James Morse
  2017-05-21  9:08         ` gengdongjiu
  0 siblings, 1 reply; 31+ messages in thread
From: James Morse @ 2017-05-12 17:24 UTC (permalink / raw)
  To: linux-arm-kernel

Hi gengdongjiu,

On 05/05/17 14:19, gengdongjiu wrote:
> On 2017/5/2 23:37, James Morse wrote:
> > ... I think you expect an SError to arrive at EL2 and have its ESR recorded in
> > vcpu->arch.fault.vsesr_el2. Some time later KVM decides to inject an SError into
> > the guest, and this ESR is reused...
> > 
> > We shouldn't do this. Qemu/kvmtool may want to inject a virtual-SError that
> > never started as a physical-SError. Qemu/kvmtool may choose to notify the guest
> > of RAS events via another mechanism, or not at all.
> > 
> > KVM should not give the guest an ESR value of its choice. For SError the ESR
> > describes whether the error is corrected, correctable or fatal. Qemu/kvmtool
> > must choose this.

> Below is my previous solution:
> For the SError, CPU will firstly trap to EL3 firmware and records the syndrome to ESR_EL3.
> Before jumping to El2 hypervisors, it will copy the esr_el3 to esr_el2.

(Copying the ESR value won't always be the right thing to do.)


> so in order to pass this syndrome to vsesr_el2, using the esr_el2 value to assign it.

> If Qemu/kvmtool chooses the ESR value and ESR only describes whether the error is corrected/correctable/fatal,
> whether the information is not enough for the guest?

So the API should specify which of these three severities to use? I think this
is too specific. The API should be useful for anything the VSE/VSESR hardware
can do.

VSESR_EL2 is described in the RAS spec: 4.4.12 [0], its a 64 bit register. I
think we should let Qemu/kvmtool specify any 64bit value here, but KVM should
reject values that try to set bits described as RES0.

This would let Qemu/kvmtool specify any SError ISS, either setting ESR_ELx.IDS
and some virtual-machine specific value, or encoding any severity in AET and
choosing the DFSC/EA bits appropriately.


>> > I think we need an API that allows Qemu/kvmtool to inject SError into a guest,
>> > but that isn't quite what you have here.

> KVM provides APIs to inject the SError, Qemu/kvmtool call the API though IOCTL, may be OK?

(just the one API call), yes.


Thanks,

James

[0]
https://static.docs.arm.com/ddi0587/a/RAS%20Extension-release%20candidate_march_29.pdf

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome
  2017-05-12 17:24       ` James Morse
@ 2017-05-21  9:08         ` gengdongjiu
  0 siblings, 0 replies; 31+ messages in thread
From: gengdongjiu @ 2017-05-21  9:08 UTC (permalink / raw)
  To: linux-arm-kernel

2017-05-13 1:24 GMT+08:00, James Morse <james.morse@arm.com>:
> Hi gengdongjiu,
>
> On 05/05/17 14:19, gengdongjiu wrote:
>> On 2017/5/2 23:37, James Morse wrote:
>> > ... I think you expect an SError to arrive at EL2 and have its ESR
>> > recorded in
>> > vcpu->arch.fault.vsesr_el2. Some time later KVM decides to inject an
>> > SError into
>> > the guest, and this ESR is reused...
>> >
>> > We shouldn't do this. Qemu/kvmtool may want to inject a virtual-SError
>> > that
>> > never started as a physical-SError. Qemu/kvmtool may choose to notify
>> > the guest
>> > of RAS events via another mechanism, or not at all.
>> >
>> > KVM should not give the guest an ESR value of its choice. For SError the
>> > ESR
>> > describes whether the error is corrected, correctable or fatal.
>> > Qemu/kvmtool
>> > must choose this.
>
>> Below is my previous solution:
>> For the SError, CPU will firstly trap to EL3 firmware and records the
>> syndrome to ESR_EL3.
>> Before jumping to El2 hypervisors, it will copy the esr_el3 to esr_el2.
>
> (Copying the ESR value won't always be the right thing to do.)
  James,  thanks for your kindly explanation, understand you  thought.

>
>
>> so in order to pass this syndrome to vsesr_el2, using the esr_el2 value to
>> assign it.
>
>> If Qemu/kvmtool chooses the ESR value and ESR only describes whether the
>> error is corrected/correctable/fatal,
>> whether the information is not enough for the guest?
>
> So the API should specify which of these three severities to use? I think
> this
> is too specific. The API should be useful for anything the VSE/VSESR
> hardware
> can do.
>
> VSESR_EL2 is described in the RAS spec: 4.4.12 [0], its a 64 bit register.
> I
> think we should let Qemu/kvmtool specify any 64bit value here, but KVM
> should
> reject values that try to set bits described as RES0.
>
> This would let Qemu/kvmtool specify any SError ISS, either setting
> ESR_ELx.IDS
> and some virtual-machine specific value, or encoding any severity in AET
> and
> choosing the DFSC/EA bits appropriately.
   it sounds reasonable

>
>
>>> > I think we need an API that allows Qemu/kvmtool to inject SError into a
>>> > guest,
>>> > but that isn't quite what you have here.
>
>> KVM provides APIs to inject the SError, Qemu/kvmtool call the API though
>> IOCTL, may be OK?
>
> (just the one API call), yes.
   Ok, have added.

>
>
> Thanks,
>
> James
>
> [0]
> https://static.docs.arm.com/ddi0587/a/RAS%20Extension-release%20candidate_march_29.pdf
>
> _______________________________________________
> kvmarm mailing list
> kvmarm at lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-04-30  5:37 [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Dongjiu Geng
  2017-04-30  5:37 ` [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome Dongjiu Geng
@ 2017-04-30  5:37 ` Dongjiu Geng
  2017-05-02 15:41   ` James Morse
  2017-05-02  7:56 ` [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Christoffer Dall
  2017-05-02 15:29 ` James Morse
  3 siblings, 1 reply; 31+ messages in thread
From: Dongjiu Geng @ 2017-04-30  5:37 UTC (permalink / raw)
  To: linux-arm-kernel

when happen SEA, deliver signal bus and handle the ioctl that
inject SEA abort to guest, so that guest can handle the SEA error.

Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
---
 arch/arm/include/asm/kvm_host.h   |  1 +
 arch/arm/kvm/arm.c                |  3 +++
 arch/arm/kvm/guest.c              |  5 +++++
 arch/arm/kvm/mmu.c                | 37 +++++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/kvm_host.h |  1 +
 arch/arm64/kvm/guest.c            |  7 +++++++
 include/uapi/linux/kvm.h          |  1 +
 7 files changed, 55 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 31ee468..ad19f80 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -244,6 +244,7 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
+int kvm_vcpu_ioctl_sea(struct kvm_vcpu *vcpu);
 
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 96dba7c..907ba4a 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -972,6 +972,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			return -E2BIG;
 		return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
 	}
+	case KVM_ARM_SEA: {
+		return kvm_vcpu_ioctl_sea(vcpu);
+	}
 	case KVM_SET_DEVICE_ATTR: {
 		if (copy_from_user(&attr, argp, sizeof(attr)))
 			return -EFAULT;
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index fa6182a..48e5b53 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -247,6 +247,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 {
 	return -EINVAL;
 }
+int kvm_vcpu_ioctl_sea(struct kvm_vcpu *vcpu)
+{
+	return 0;
+
+}
 
 int __attribute_const__ kvm_target_cpu(void)
 {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 105b6ab..a96594f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -20,8 +20,10 @@
 #include <linux/kvm_host.h>
 #include <linux/io.h>
 #include <linux/hugetlb.h>
+#include <linux/sched/signal.h>
 #include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
+#include <asm/siginfo.h>
 #include <asm/cacheflush.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
@@ -1238,6 +1240,36 @@ static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
 	__coherent_cache_guest_page(vcpu, pfn, size);
 }
 
+static void kvm_send_signal(unsigned long address, bool hugetlb, bool hwpoison)
+{
+	siginfo_t info;
+
+	info.si_signo   = SIGBUS;
+	info.si_errno   = 0;
+	if (hwpoison)
+		info.si_code    = BUS_MCEERR_AR;
+	else
+		info.si_code    = 0;
+
+	info.si_addr    = (void __user *)address;
+	if (hugetlb)
+		info.si_addr_lsb = PMD_SHIFT;
+	else
+		info.si_addr_lsb = PAGE_SHIFT;
+
+	send_sig_info(SIGBUS, &info, current);
+}
+
+static void kvm_handle_bad_page(unsigned long address,
+					bool hugetlb, bool hwpoison)
+{
+	/* handle both hwpoison and other synchronous external Abort */
+	if (hwpoison)
+		kvm_send_signal(address, hugetlb, true);
+	else
+		kvm_send_signal(address, hugetlb, false);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -1307,6 +1339,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	smp_rmb();
 
 	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
+	if (pfn == KVM_PFN_ERR_HWPOISON) {
+		kvm_handle_bad_page(hva, hugetlb, true);
+		return 0;
+	}
+
 	if (is_error_noslot_pfn(pfn))
 		return -EFAULT;
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 84ed239..4a80c3b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -386,6 +386,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
+int kvm_vcpu_ioctl_sea(struct kvm_vcpu *vcpu);
 
 static inline void __cpu_init_stage2(void)
 {
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index b37446a..780e3c4 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -277,6 +277,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return -EINVAL;
 }
 
+int kvm_vcpu_ioctl_sea(struct kvm_vcpu *vcpu)
+{
+	kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
+	return 0;
+}
+
 int __attribute_const__ kvm_target_cpu(void)
 {
 	unsigned long implementor = read_cpuid_implementor();
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index bb02909..1d2e2e7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1306,6 +1306,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
 /* Available with KVM_CAP_X86_SMM */
 #define KVM_SMI                   _IO(KVMIO,   0xb7)
+#define KVM_ARM_SEA               _IO(KVMIO,   0xb8)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
2.10.1

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-04-30  5:37 ` [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error Dongjiu Geng
@ 2017-05-02 15:41   ` James Morse
  0 siblings, 0 replies; 31+ messages in thread
From: James Morse @ 2017-05-02 15:41 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Dongjiu Geng,

On 30/04/17 06:37, Dongjiu Geng wrote:
> when happen SEA, deliver signal bus and handle the ioctl that
> inject SEA abort to guest, so that guest can handle the SEA error.

> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index 105b6ab..a96594f 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -20,8 +20,10 @@
> @@ -1238,6 +1240,36 @@ static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
>  	__coherent_cache_guest_page(vcpu, pfn, size);
>  }
>  
> +static void kvm_send_signal(unsigned long address, bool hugetlb, bool hwpoison)
> +{
> +	siginfo_t info;
> +
> +	info.si_signo   = SIGBUS;
> +	info.si_errno   = 0;
> +	if (hwpoison)
> +		info.si_code    = BUS_MCEERR_AR;
> +	else
> +		info.si_code    = 0;
> +
> +	info.si_addr    = (void __user *)address;
> +	if (hugetlb)
> +		info.si_addr_lsb = PMD_SHIFT;
> +	else
> +		info.si_addr_lsb = PAGE_SHIFT;
> +
> +	send_sig_info(SIGBUS, &info, current);
> +}
> +

Punit reviewed the other version of this patch, this PMD_SHIFT is not the right
thing to do, it needs a more accurate set of calls and shifts as there may be
hugetlbfs pages other than PMD_SIZE.

https://www.spinics.net/lists/arm-kernel/msg568919.html

I haven't posted a new version of that patch because I was still hunting a bug
in the hugepage/hwpoison code, even with Punit's fixes series I see -EFAULT
returned to userspace instead of this hwpoison code being invoked.

Please avoid duplicating functionality between patches, it wastes reviewers
time, especially when we know there are problems with this approach.

> +static void kvm_handle_bad_page(unsigned long address,
> +					bool hugetlb, bool hwpoison)
> +{
> +	/* handle both hwpoison and other synchronous external Abort */
> +	if (hwpoison)
> +		kvm_send_signal(address, hugetlb, true);
> +	else
> +		kvm_send_signal(address, hugetlb, false);
> +}

Why the extra level of indirection? We only want to signal userspace like this
from KVM for hwpoison. Signals for RAS related reasons should come from the bits
of the kernel that decoded the error.

(hwpoison for KVM is a corner case as Qemu's memory effectively has two users,
Qemu and KVM. This isn't the example of how user-space gets signalled.)

> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index b37446a..780e3c4 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -277,6 +277,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
>  	return -EINVAL;
>  }
>  
> +int kvm_vcpu_ioctl_sea(struct kvm_vcpu *vcpu)
> +{
> +	kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
> +
> +	return 0;
> +}

> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index bb02909..1d2e2e7 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1306,6 +1306,7 @@ struct kvm_s390_ucas_mapping {
>  #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
>  /* Available with KVM_CAP_X86_SMM */
>  #define KVM_SMI                   _IO(KVMIO,   0xb7)
> +#define KVM_ARM_SEA               _IO(KVMIO,   0xb8)
>  
>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
>  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
> 

Why do we need a userspace API for SEA? It can also be done by using
KVM_{G,S}ET_ONE_REG to change the vcpu registers. The advantage of doing it this
way is you can choose which ESR value to use.

Adding a new API call to do something you could do with an old one doesn't look
right.

Thanks,

James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
  2017-04-30  5:37 [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Dongjiu Geng
  2017-04-30  5:37 ` [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome Dongjiu Geng
  2017-04-30  5:37 ` [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error Dongjiu Geng
@ 2017-05-02  7:56 ` Christoffer Dall
  2017-05-02 11:05   ` gengdongjiu
                     ` (2 more replies)
  2017-05-02 15:29 ` James Morse
  3 siblings, 3 replies; 31+ messages in thread
From: Christoffer Dall @ 2017-05-02  7:56 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Dongjiu,

Please send a cover letter for patch series with more than a single
patch.

The subject and description of these patches are also misleading.
Hopefully this is in no way tied to kvmtool, but to userspace
generically, for example also to be used by QEMU?

On Sun, Apr 30, 2017 at 01:37:55PM +0800, Dongjiu Geng wrote:
> Handle kvmtool's detection for RAS extension, because sometimes
> the APP needs to know the CPU's capacity

the APP ?

the CPU's capacity?

> 
> Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
> ---
>  arch/arm64/kvm/reset.c   | 11 +++++++++++
>  include/uapi/linux/kvm.h |  1 +
>  2 files changed, 12 insertions(+)
> 
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index d9e9697..1004039 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -64,6 +64,14 @@ static bool cpu_has_32bit_el1(void)
>  	return !!(pfr0 & 0x20);
>  }
>  
> +static bool kvm_arm_support_ras_extension(void)
> +{
> +	u64 pfr0;
> +
> +	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
> +	return !!(pfr0 & 0x10000000);
> +}
> +
>  /**
>   * kvm_arch_dev_ioctl_check_extension
>   *
> @@ -87,6 +95,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_ARM_PMU_V3:
>  		r = kvm_arm_support_pmu_v3();
>  		break;
> +	case KVM_CAP_ARM_RAS_EXTENSION:
> +		r = kvm_arm_support_ras_extension();
> +		break;

You need to document this capability and API in
Documentation/virtual/kvm/api.txt and explain how this works.



>  	case KVM_CAP_SET_GUEST_DEBUG:
>  	case KVM_CAP_VCPU_ATTRIBUTES:
>  		r = 1;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index f51d508..27fe556 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -883,6 +883,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_PPC_MMU_RADIX 134
>  #define KVM_CAP_PPC_MMU_HASH_V3 135
>  #define KVM_CAP_IMMEDIATE_EXIT 136
> +#define KVM_CAP_ARM_RAS_EXTENSION 137
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> -- 
> 2.10.1
> 

Thanks,
-Christoffer

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
  2017-05-02  7:56 ` [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Christoffer Dall
@ 2017-05-02 11:05   ` gengdongjiu
  2017-05-02 12:15   ` gengdongjiu
  2017-05-02 15:48   ` Paolo Bonzini
  2 siblings, 0 replies; 31+ messages in thread
From: gengdongjiu @ 2017-05-02 11:05 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Christoffer,
   thanks for your review and comments

On 2017/5/2 15:56, Christoffer Dall wrote:
> Hi Dongjiu,
> 
> Please send a cover letter for patch series with more than a single
> patch.
  OK, got it.

> 
> The subject and description of these patches are also misleading.
> Hopefully this is in no way tied to kvmtool, but to userspace
> generically, for example also to be used by QEMU?
 yes, it is also used by QEMU, it should be userspace.


> 
> On Sun, Apr 30, 2017 at 01:37:55PM +0800, Dongjiu Geng wrote:
>> Handle kvmtool's detection for RAS extension, because sometimes
>> the APP needs to know the CPU's capacity
> 
> the APP ?
> 
> the CPU's capacity?
  I will fix it.

> 
>>
>> Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
>> ---
>>  arch/arm64/kvm/reset.c   | 11 +++++++++++
>>  include/uapi/linux/kvm.h |  1 +
>>  2 files changed, 12 insertions(+)
>>
>> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
>> index d9e9697..1004039 100644
>> --- a/arch/arm64/kvm/reset.c
>> +++ b/arch/arm64/kvm/reset.c
>> @@ -64,6 +64,14 @@ static bool cpu_has_32bit_el1(void)
>>  	return !!(pfr0 & 0x20);
>>  }
>>  
>> +static bool kvm_arm_support_ras_extension(void)
>> +{
>> +	u64 pfr0;
>> +
>> +	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
>> +	return !!(pfr0 & 0x10000000);
>> +}
>> +
>>  /**
>>   * kvm_arch_dev_ioctl_check_extension
>>   *
>> @@ -87,6 +95,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
>>  	case KVM_CAP_ARM_PMU_V3:
>>  		r = kvm_arm_support_pmu_v3();
>>  		break;
>> +	case KVM_CAP_ARM_RAS_EXTENSION:
>> +		r = kvm_arm_support_ras_extension();
>> +		break;
> 
> You need to document this capability and API in
> Documentation/virtual/kvm/api.txt and explain how this works.
  Ok, thanks for your suggestion.

> 
> 
> 
>>  	case KVM_CAP_SET_GUEST_DEBUG:
>>  	case KVM_CAP_VCPU_ATTRIBUTES:
>>  		r = 1;
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index f51d508..27fe556 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -883,6 +883,7 @@ struct kvm_ppc_resize_hpt {
>>  #define KVM_CAP_PPC_MMU_RADIX 134
>>  #define KVM_CAP_PPC_MMU_HASH_V3 135
>>  #define KVM_CAP_IMMEDIATE_EXIT 136
>> +#define KVM_CAP_ARM_RAS_EXTENSION 137
>>  
>>  #ifdef KVM_CAP_IRQ_ROUTING
>>  
>> -- 
>> 2.10.1
>>
> 
> Thanks,
> -Christoffer
> 
> .
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
  2017-05-02  7:56 ` [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Christoffer Dall
  2017-05-02 11:05   ` gengdongjiu
@ 2017-05-02 12:15   ` gengdongjiu
  2017-05-02 15:48   ` Paolo Bonzini
  2 siblings, 0 replies; 31+ messages in thread
From: gengdongjiu @ 2017-05-02 12:15 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Christoffer,
   thanks for your review and comments.

On 2017/5/2 15:56, Christoffer Dall wrote:
> Hi Dongjiu,
> 
> Please send a cover letter for patch series with more than a single
> patch.
 OK, got it.

> 
> The subject and description of these patches are also misleading.
> Hopefully this is in no way tied to kvmtool, but to userspace
> generically, for example also to be used by QEMU?
> 
> On Sun, Apr 30, 2017 at 01:37:55PM +0800, Dongjiu Geng wrote:
>> Handle kvmtool's detection for RAS extension, because sometimes
>> the APP needs to know the CPU's capacity
> 
> the APP ?
> 
> the CPU's capacity?
I will fix it.

> 
>>
>> Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
>> ---
>>  arch/arm64/kvm/reset.c   | 11 +++++++++++
>>  include/uapi/linux/kvm.h |  1 +
>>  2 files changed, 12 insertions(+)
>>
>> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
>> index d9e9697..1004039 100644
>> --- a/arch/arm64/kvm/reset.c
>> +++ b/arch/arm64/kvm/reset.c
>> @@ -64,6 +64,14 @@ static bool cpu_has_32bit_el1(void)
>>  	return !!(pfr0 & 0x20);
>>  }
>>  
>> +static bool kvm_arm_support_ras_extension(void)
>> +{
>> +	u64 pfr0;
>> +
>> +	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
>> +	return !!(pfr0 & 0x10000000);
>> +}
>> +
>>  /**
>>   * kvm_arch_dev_ioctl_check_extension
>>   *
>> @@ -87,6 +95,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
>>  	case KVM_CAP_ARM_PMU_V3:
>>  		r = kvm_arm_support_pmu_v3();
>>  		break;
>> +	case KVM_CAP_ARM_RAS_EXTENSION:
>> +		r = kvm_arm_support_ras_extension();
>> +		break;
> 
> You need to document this capability and API in
> Documentation/virtual/kvm/api.txt and explain how this works.
 Ok, thanks for your suggestion.

> 
> 
> 
>>  	case KVM_CAP_SET_GUEST_DEBUG:
>>  	case KVM_CAP_VCPU_ATTRIBUTES:
>>  		r = 1;
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index f51d508..27fe556 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -883,6 +883,7 @@ struct kvm_ppc_resize_hpt {
>>  #define KVM_CAP_PPC_MMU_RADIX 134
>>  #define KVM_CAP_PPC_MMU_HASH_V3 135
>>  #define KVM_CAP_IMMEDIATE_EXIT 136
>> +#define KVM_CAP_ARM_RAS_EXTENSION 137
>>  
>>  #ifdef KVM_CAP_IRQ_ROUTING
>>  
>> -- 
>> 2.10.1
>>
> 
> Thanks,
> -Christoffer
> 
> .
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
  2017-05-02  7:56 ` [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Christoffer Dall
  2017-05-02 11:05   ` gengdongjiu
  2017-05-02 12:15   ` gengdongjiu
@ 2017-05-02 15:48   ` Paolo Bonzini
  2017-05-04  8:19     ` James Morse
  2 siblings, 1 reply; 31+ messages in thread
From: Paolo Bonzini @ 2017-05-02 15:48 UTC (permalink / raw)
  To: linux-arm-kernel



On 02/05/2017 09:56, Christoffer Dall wrote:
> Hi Dongjiu,
> 
> Please send a cover letter for patch series with more than a single
> patch.
> 
> The subject and description of these patches are also misleading.
> Hopefully this is in no way tied to kvmtool, but to userspace
> generically, for example also to be used by QEMU?

Yes, QEMU already has a similar capability on x86.

Does ARM support background scrubbing of memory to detect errors?  If
so, are there any plans to support action-optional SIGBUS on ARM?

Paolo

> On Sun, Apr 30, 2017 at 01:37:55PM +0800, Dongjiu Geng wrote:
>> Handle kvmtool's detection for RAS extension, because sometimes
>> the APP needs to know the CPU's capacity
> 
> the APP ?
> 
> the CPU's capacity?
> 
>>
>> Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
>> ---
>>  arch/arm64/kvm/reset.c   | 11 +++++++++++
>>  include/uapi/linux/kvm.h |  1 +
>>  2 files changed, 12 insertions(+)
>>
>> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
>> index d9e9697..1004039 100644
>> --- a/arch/arm64/kvm/reset.c
>> +++ b/arch/arm64/kvm/reset.c
>> @@ -64,6 +64,14 @@ static bool cpu_has_32bit_el1(void)
>>  	return !!(pfr0 & 0x20);
>>  }
>>  
>> +static bool kvm_arm_support_ras_extension(void)
>> +{
>> +	u64 pfr0;
>> +
>> +	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
>> +	return !!(pfr0 & 0x10000000);
>> +}
>> +
>>  /**
>>   * kvm_arch_dev_ioctl_check_extension
>>   *
>> @@ -87,6 +95,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
>>  	case KVM_CAP_ARM_PMU_V3:
>>  		r = kvm_arm_support_pmu_v3();
>>  		break;
>> +	case KVM_CAP_ARM_RAS_EXTENSION:
>> +		r = kvm_arm_support_ras_extension();
>> +		break;
> 
> You need to document this capability and API in
> Documentation/virtual/kvm/api.txt and explain how this works.
> 
> 
> 
>>  	case KVM_CAP_SET_GUEST_DEBUG:
>>  	case KVM_CAP_VCPU_ATTRIBUTES:
>>  		r = 1;
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index f51d508..27fe556 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -883,6 +883,7 @@ struct kvm_ppc_resize_hpt {
>>  #define KVM_CAP_PPC_MMU_RADIX 134
>>  #define KVM_CAP_PPC_MMU_HASH_V3 135
>>  #define KVM_CAP_IMMEDIATE_EXIT 136
>> +#define KVM_CAP_ARM_RAS_EXTENSION 137
>>  
>>  #ifdef KVM_CAP_IRQ_ROUTING
>>  
>> -- 
>> 2.10.1
>>
> 
> Thanks,
> -Christoffer
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
  2017-05-02 15:48   ` Paolo Bonzini
@ 2017-05-04  8:19     ` James Morse
  0 siblings, 0 replies; 31+ messages in thread
From: James Morse @ 2017-05-04  8:19 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Paolo,

On 02/05/17 16:48, Paolo Bonzini wrote:
> On 02/05/2017 09:56, Christoffer Dall wrote:
>> The subject and description of these patches are also misleading.
>> Hopefully this is in no way tied to kvmtool, but to userspace
>> generically, for example also to be used by QEMU?
> 
> Yes, QEMU already has a similar capability on x86.
> 
> Does ARM support background scrubbing of memory to detect errors? 

As part of RAS support, yes. A way for firmware to notify the OS about these
events was recently added to the ACPI specification.
We are aiming to turn on ARCH_SUPPORTS_MEMORY_FAILURE which does the Linux end
of things. Punit has a series here:
https://www.spinics.net/lists/arm-kernel/msg575944.html

> If
> so, are there any plans to support action-optional SIGBUS on ARM?

It looks like ARCH_SUPPORTS_MEMORY_FAILURE will bring that in, so yes.

Thanks,

James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
  2017-04-30  5:37 [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Dongjiu Geng
                   ` (2 preceding siblings ...)
  2017-05-02  7:56 ` [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Christoffer Dall
@ 2017-05-02 15:29 ` James Morse
  2017-05-04 15:49   ` James Morse
  2017-06-26  5:22   ` gengdongjiu
  3 siblings, 2 replies; 31+ messages in thread
From: James Morse @ 2017-05-02 15:29 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Dongjiu Geng,

On 30/04/17 06:37, Dongjiu Geng wrote:
> Handle kvmtool's detection for RAS extension, because sometimes
> the APP needs to know the CPU's capacity

> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index d9e9697..1004039 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -64,6 +64,14 @@ static bool cpu_has_32bit_el1(void)
>  	return !!(pfr0 & 0x20);
>  }
>  
> +static bool kvm_arm_support_ras_extension(void)
> +{
> +	u64 pfr0;
> +
> +	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
> +	return !!(pfr0 & 0x10000000);
> +}

Why are we telling user-space that the CPU has RAS extensions? EL0 can't do
anything with this and the guest EL1 can detect it from the id registers.

Are you using this to decide whether or not to generate a HEST for the guest?

If Qemu/kvmtool supports handling memory-failure notifications from signals you
should always generate a HEST. The GHES notification method could be anything
Qemu can deliver to the guest using the KVM APIs. Notifications from Qemu to the
guest don't depend on the RAS extensions. KVM has APIs for IRQ and SEA (you can
use KVM_SET_ONE_REG).

I think we need a new API for injecting SError for SEI from Qemu/kvmtool, but it
shouldn't be related to the RAS extensions. All v8.0 CPUs have HCR_EL2.VSE, so
we need to know KVM supports this API.

Your later patch adds code to set VSESR to make virtual RAS SErrors work, I
think we need to expose that to user-space.

Thanks,

James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
  2017-05-02 15:29 ` James Morse
@ 2017-05-04 15:49   ` James Morse
  2017-05-05 12:44     ` gengdongjiu
  2017-06-26  5:22   ` gengdongjiu
  1 sibling, 1 reply; 31+ messages in thread
From: James Morse @ 2017-05-04 15:49 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Dongjiu Geng,

On 02/05/17 16:29, James Morse wrote:
> I think we need a new API for injecting SError for SEI from Qemu/kvmtool, but it
> shouldn't be related to the RAS extensions. All v8.0 CPUs have HCR_EL2.VSE, so
> we need to know KVM supports this API.

Thinking about this some more, it is slightly more nuanced, KVM can always
provide an API to inject SError, but it can only set the VSESR if the CPU has
the RAS Extensions.

Only offering the inject-SError API call if we can also set the VSESR looks a
bit funny, but no-one has needed the no-ESR version so far.

I still don't think we should let user-space make the 'RAS Extensions means
VSESR' logical step.

So my comments on this patch become:
Don't read id registers directly, use cpus_have_cap() which handles features
that differ across CPUs or were turned off at compile time.

Please don't call this 'KVM_CAP_ARM_RAS_EXTENSION', if we are building an API to
inject SError, call it that instead.

Thanks,

James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
  2017-05-04 15:49   ` James Morse
@ 2017-05-05 12:44     ` gengdongjiu
  0 siblings, 0 replies; 31+ messages in thread
From: gengdongjiu @ 2017-05-05 12:44 UTC (permalink / raw)
  To: linux-arm-kernel

Hi James,
     Thanks a lot for your comments.

2017-05-04 23:49 GMT+08:00 James Morse <james.morse@arm.com>:
> Hi Dongjiu Geng,
>
> On 02/05/17 16:29, James Morse wrote:
>> I think we need a new API for injecting SError for SEI from Qemu/kvmtool, but it
>> shouldn't be related to the RAS extensions. All v8.0 CPUs have HCR_EL2.VSE, so
>> we need to know KVM supports this API.
>
> Thinking about this some more, it is slightly more nuanced, KVM can always
> provide an API to inject SError, but it can only set the VSESR if the CPU has
> the RAS Extensions.

James, do you mean we need to add a new API instead of adding the
VSESR in the old API kvm_inject_vabt?

 /**
  * kvm_inject_vabt - inject an async abort / SError into the guest
  * @vcpu: The VCPU to receive the exception
  *
  * It is assumed that this code is called from the VCPU thread and that the
  * VCPU therefore is not currently executing guest code.
  */
 void kvm_inject_vabt(struct kvm_vcpu *vcpu)
 {
     vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
 }



>
> Only offering the inject-SError API call if we can also set the VSESR looks a
> bit funny, but no-one has needed the no-ESR version so far.
>
> I still don't think we should let user-space make the 'RAS Extensions means
> VSESR' logical step.

 Ok. got it.

>
> So my comments on this patch become:
> Don't read id registers directly, use cpus_have_cap() which handles features
> that differ across CPUs or were turned off at compile time.
>
> Please don't call this 'KVM_CAP_ARM_RAS_EXTENSION', if we are building an API to
> inject SError, call it that instead.

Ok, thanks for your suggestion.

>
>
> Thanks,
>
> James
> _______________________________________________
> kvmarm mailing list
> kvmarm at lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature
  2017-05-02 15:29 ` James Morse
  2017-05-04 15:49   ` James Morse
@ 2017-06-26  5:22   ` gengdongjiu
  1 sibling, 0 replies; 31+ messages in thread
From: gengdongjiu @ 2017-06-26  5:22 UTC (permalink / raw)
  To: linux-arm-kernel

Hi James,

   I have changed the SEA/SEI injection method according you suggestion, but I think this patch may also be needed.
Now for the  SEI, the virtual ESR value is specified by the userspace. only RAS extension support to set the virtual ESR value. so user space will check it to decide whether pass the virtual ESR value.
At the same time, reserve this interface for other possible usage by user space. what do you think about this patch?



On 2017/5/2 23:29, James Morse wrote:
> Hi Dongjiu Geng,
> 
> On 30/04/17 06:37, Dongjiu Geng wrote:
>> Handle kvmtool's detection for RAS extension, because sometimes
>> the APP needs to know the CPU's capacity
> 
>> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
>> index d9e9697..1004039 100644
>> --- a/arch/arm64/kvm/reset.c
>> +++ b/arch/arm64/kvm/reset.c
>> @@ -64,6 +64,14 @@ static bool cpu_has_32bit_el1(void)
>>  	return !!(pfr0 & 0x20);
>>  }
>>  
>> +static bool kvm_arm_support_ras_extension(void)
>> +{
>> +	u64 pfr0;
>> +
>> +	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
>> +	return !!(pfr0 & 0x10000000);
>> +}
> 
> Why are we telling user-space that the CPU has RAS extensions? EL0 can't do
> anything with this and the guest EL1 can detect it from the id registers.
> 
> 
> Are you using this to decide whether or not to generate a HEST for the guest?
> 
> If Qemu/kvmtool supports handling memory-failure notifications from signals you
> should always generate a HEST. The GHES notification method could be anything
> Qemu can deliver to the guest using the KVM APIs. Notifications from Qemu to the
> guest don't depend on the RAS extensions. KVM has APIs for IRQ and SEA (you can
> use KVM_SET_ONE_REG).
> 
> 
> I think we need a new API for injecting SError for SEI from Qemu/kvmtool, but it
> shouldn't be related to the RAS extensions. All v8.0 CPUs have HCR_EL2.VSE, so
> we need to know KVM supports this API.
> 
> Your later patch adds code to set VSESR to make virtual RAS SErrors work, I
> think we need to expose that to user-space.
> 
> 
> Thanks,
> 
> James
> .
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

[parent not found: <CAMj-D2BGTDKpOcMu2ip41_MTTj8VDwvs59Ds7yvLHcD8PeQzhg@mail.gmail.com>]

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
       [not found] <CAMj-D2BGTDKpOcMu2ip41_MTTj8VDwvs59Ds7yvLHcD8PeQzhg@mail.gmail.com>
@ 2017-05-05 12:31 ` gengdongjiu
  2017-05-12 17:24   ` James Morse
  2017-05-08 17:28 ` James Morse
  1 sibling, 1 reply; 31+ messages in thread
From: gengdongjiu @ 2017-05-05 12:31 UTC (permalink / raw)
  To: linux-arm-kernel

HI James,

2017-05-05 0:52 GMT+08:00 gengdongjiu <gengdj.1984@gmail.com>:
> Dear James,
>    Thanks a lot for your review and comments. I am very sorry for the
> late response.
>
>
> 2017-05-04 23:42 GMT+08:00 gengdongjiu <gengdj.1984@gmail.com>:
>>  Hi Dongjiu Geng,
>>
>> On 30/04/17 06:37, Dongjiu Geng wrote:
>>> when happen SEA, deliver signal bus and handle the ioctl that
>>> inject SEA abort to guest, so that guest can handle the SEA error.
>>
>>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>>> index 105b6ab..a96594f 100644
>>> --- a/arch/arm/kvm/mmu.c
>>> +++ b/arch/arm/kvm/mmu.c
>>> @@ -20,8 +20,10 @@
>>> @@ -1238,6 +1240,36 @@ static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
>>>   __coherent_cache_guest_page(vcpu, pfn, size);
>>>  }
>>>
>>> +static void kvm_send_signal(unsigned long address, bool hugetlb, bool hwpoison)
>>> +{
>>> + siginfo_t info;
>>> +
>>> + info.si_signo   = SIGBUS;
>>> + info.si_errno   = 0;
>>> + if (hwpoison)
>>> + info.si_code    = BUS_MCEERR_AR;
>>> + else
>>> + info.si_code    = 0;
>>> +
>>> + info.si_addr    = (void __user *)address;
>>> + if (hugetlb)
>>> + info.si_addr_lsb = PMD_SHIFT;
>>> + else
>>> + info.si_addr_lsb = PAGE_SHIFT;
>>> +
>>> + send_sig_info(SIGBUS, &info, current);
>>> +}
>>> +
>> ?  [hide part of quote]
>>
>> Punit reviewed the other version of this patch, this PMD_SHIFT is not the right
>> thing to do, it needs a more accurate set of calls and shifts as there may be
>> hugetlbfs pages other than PMD_SIZE.
>>
>> https://www.spinics.net/lists/arm-kernel/msg568919.html
>>
>> I haven't posted a new version of that patch because I was still hunting a bug
>> in the hugepage/hwpoison code, even with Punit's fixes series I see -EFAULT
>> returned to userspace instead of this hwpoison code being invoked.
>
>   Ok, got it, thanks for your information.
>>
>> Please avoid duplicating functionality between patches, it wastes reviewers
>> time, especially when we know there are problems with this approach.
>>
>>
>>> +static void kvm_handle_bad_page(unsigned long address,
>>> + bool hugetlb, bool hwpoison)
>>> +{
>>> + /* handle both hwpoison and other synchronous external Abort */
>>> + if (hwpoison)
>>> + kvm_send_signal(address, hugetlb, true);
>>> + else
>>> + kvm_send_signal(address, hugetlb, false);
>>> +}
>>
>> Why the extra level of indirection? We only want to signal userspace like this
>> from KVM for hwpoison. Signals for RAS related reasons should come from the bits
>> of the kernel that decoded the error.
>
> For the SEA, the are maily two types:
> 0b010000 Synchronous External Abort on memory access.
> 0b0101xx Synchronous External Abort on page table walk. DFSC[1:0]
> encode the level.
>
> hwpoison should belong to the  "Synchronous External Abort on memory access"
> if the SEA type is not hwpoison, such as page table walk, do you mean
> KVM do not deliver the SIGBUS?
> If so, how the KVM handle the SEA type other than hwpoison?
>
>>
>> (hwpoison for KVM is a corner case as Qemu's memory effectively has two users,
>> Qemu and KVM. This isn't the example of how user-space gets signalled.)
>>
>>
>>> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
>>> index b37446a..780e3c4 100644
>>> --- a/arch/arm64/kvm/guest.c
>>> +++ b/arch/arm64/kvm/guest.c
>>> @@ -277,6 +277,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
>>>   return -EINVAL;
>>>  }
>>>
>>> +int kvm_vcpu_ioctl_sea(struct kvm_vcpu *vcpu)
>>> +{
>>> + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
>>> +
>>> + return 0;
>>> +}
>>
>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>> index bb02909..1d2e2e7 100644
>>> --- a/include/uapi/linux/kvm.h
>>> +++ b/include/uapi/linux/kvm.h
>>> @@ -1306,6 +1306,7 @@ struct kvm_s390_ucas_mapping {
>>>  #define KVM_S390_GET_IRQ_STATE  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
>>>  /* Available with KVM_CAP_X86_SMM */
>>>  #define KVM_SMI                   _IO(KVMIO,   0xb7)
>>> +#define KVM_ARM_SEA               _IO(KVMIO,   0xb8)
>>>
>>>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
>>>  #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
>>>
>>
>> Why do we need a userspace API for SEA? It can also be done by using
>> KVM_{G,S}ET_ONE_REG to change the vcpu registers. The advantage of doing it this
>> way is you can choose which ESR value to use.
>>
>> Adding a new API call to do something you could do with an old one doesn't look
>> right.
>
> James, I considered your suggestion before that use the
> KVM_{G,S}ET_ONE_REG to change the vcpu registers. but I found it does
> not have difference to use the alread existed KVM API.  so may be
> changing the vcpu registers in qemu will duplicate with the KVM APIs.
>
> injection a SEA is no more than setting some registers: elr_el1, PC,
> PSTATE, SPSR_el1, far_el1, esr_el1
> I seen this KVM API do the same thing as Qemu.  do you found call this
> API will have issue and necessary to choose another ESR value?

I consider again, you are right.

when guest OS happen an SEA, My current solution is shown below:

(1) host EL3 firmware firstly handle the SEA error and generate the CPER record.
(2) EL3 firmware separately copy the esr_el3, elr_el3, SPSR_el3,
far_el3 to the esr_el2, elr_el2, SPSR_el2, far_el2.
(3) then jump the EL2 hypervisor

so the EL2 hypervisor uses the ESR that come from esr_el3,  here the
ESR(esr_el3) value may be different with the exist KVM API's ESR.


>
> I pasted the alread existed KVM API code:
>
> static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned
> long addr)
> {
>  unsigned long cpsr = *vcpu_cpsr(vcpu);
>  bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
>  u32 esr = 0;
>  *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
>  *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
>  *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
>  *vcpu_spsr(vcpu) = cpsr;
>  vcpu_sys_reg(vcpu, FAR_EL1) = addr;
>  /*
>   * Build an {i,d}abort, depending on the level and the
>   * instruction set. Report an external synchronous abort.
>   */
>  if (kvm_vcpu_trap_il_is32bit(vcpu))
>   esr |= ESR_ELx_IL;
>  /*
>   * Here, the guest runs in AArch64 mode when in EL1. If we get
>   * an AArch32 fault, it means we managed to trap an EL0 fault.
>   */
>  if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
>   esr |= (ESR_ELx_EC_IABT_LOW << ESR_ELx_EC_SHIFT);
>  else
>   esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT);
>  if (!is_iabt)
>   esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
>  vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT;
> }
>
> static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
>     unsigned long addr)
> {
>  u32 vect_offset;
>  u32 *far, *fsr;
>  bool is_lpae;
>  if (is_pabt) {
>   vect_offset = 12;
>   far = &vcpu_cp15(vcpu, c6_IFAR);
>   fsr = &vcpu_cp15(vcpu, c5_IFSR);
>  } else { /* !iabt */
>   vect_offset = 16;
>   far = &vcpu_cp15(vcpu, c6_DFAR);
>   fsr = &vcpu_cp15(vcpu, c5_DFSR);
>  }
>  prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset);
>  *far = addr;
>  /* Give the guest an IMPLEMENTATION DEFINED exception */
>  is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
>  if (is_lpae)
>   *fsr = 1 << 9 | 0x34;
>  else
>   *fsr = 0x14;
> }
>
>
> /**
>  * kvm_inject_dabt - inject a data abort into the guest
>  * @vcpu: The VCPU to receive the undefined exception
>  * @addr: The address to report in the DFAR
>  *
>  * It is assumed that this code is called from the VCPU thread and that the
>  * VCPU therefore is not currently executing guest code.
>  */
> void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
> {
>  if (!(vcpu->arch.hcr_el2 & HCR_RW))
>   inject_abt32(vcpu, false, addr);
>  else
>   inject_abt64(vcpu, false, addr);
> }
>
>
>>
>>
>> Thanks,
>>
>> James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-05 12:31 ` [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error gengdongjiu
@ 2017-05-12 17:24   ` James Morse
  2017-05-21  8:24     ` gengdongjiu
  0 siblings, 1 reply; 31+ messages in thread
From: James Morse @ 2017-05-12 17:24 UTC (permalink / raw)
  To: linux-arm-kernel

Hi gengdongjiu,

On 05/05/17 13:31, gengdongjiu wrote:
> when guest OS happen an SEA, My current solution is shown below:
> 
> (1) host EL3 firmware firstly handle the SEA error and generate the CPER record.
> (2) EL3 firmware separately copy the esr_el3, elr_el3, SPSR_el3,
> far_el3 to the esr_el2, elr_el2, SPSR_el2, far_el2.

Copying {ELR,SPSR,FAR}_EL3 to the EL2 registers rings some alarm bells: I'm sure
you exclude values from EL3 or the secure-world, we should never hand those to
the normal world.

> (3) then jump the EL2 hypervisor

> so the EL2 hypervisor uses the ESR that come from esr_el3,  here the
> ESR(esr_el3) value may be different with the exist KVM API's ESR.

The ESR may be different between EL3 and EL2. The ESR contains the severity of
the event, the CPU will choose this when it takes the SError to EL3. If it had
taken the SError to EL2, the CPU may have classified the error differently.

Firmware may need to generate a more severe ESR if it receives an error that
would be propagated by delivering SEI to a lower exception level, for example if
an EL2 system register is 'infected'.

This is the same for Qemu/kvmtool. A contained error at EL2 may be an
uncontained error if we hand it to guest EL1. Linux's RAS code will decide this
with its choice of signal to send, (and possibly which code to set).
Qemu/kvmtool need to choose an appropriate APEI notification, which may involve
generating a relevant ESR.

Also relevant is the problem we discussed earlier with trying to deliver fake
Physical-SError from software at EL3: If the SError is routed to EL2, and EL2
has PSTATE.A masked, EL3 has to wait and try again later. This is another case
where firmware may have to upgrade the classification of an error to uncontainable.

Thanks,

James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-12 17:24   ` James Morse
@ 2017-05-21  8:24     ` gengdongjiu
  0 siblings, 0 replies; 31+ messages in thread
From: gengdongjiu @ 2017-05-21  8:24 UTC (permalink / raw)
  To: linux-arm-kernel

Hi James,
    sorry for the late response due to recently verify and debug the
RAS solution.

2017-05-13 1:24 GMT+08:00, James Morse <james.morse@arm.com>:
> Hi gengdongjiu,
>
> On 05/05/17 13:31, gengdongjiu wrote:
>> when guest OS happen an SEA, My current solution is shown below:
>>
>> (1) host EL3 firmware firstly handle the SEA error and generate the CPER
>> record.
>> (2) EL3 firmware separately copy the esr_el3, elr_el3, SPSR_el3,
>> far_el3 to the esr_el2, elr_el2, SPSR_el2, far_el2.
>
> Copying {ELR,SPSR,FAR}_EL3 to the EL2 registers rings some alarm bells: I'm
> sure
> you exclude values from EL3 or the secure-world, we should never hand those
> to
> the normal world.

 it is sure that needs to  exclude the EL3 Error and secure-world.

>
>
>> (3) then jump the EL2 hypervisor
>
>> so the EL2 hypervisor uses the ESR that come from esr_el3,  here the
>> ESR(esr_el3) value may be different with the exist KVM API's ESR.
>
> The ESR may be different between EL3 and EL2. The ESR contains the severity
> of
> the event, the CPU will choose this when it takes the SError to EL3. If it
> had
> taken the SError to EL2, the CPU may have classified the error differently.
>
> Firmware may need to generate a more severe ESR if it receives an error
> that
> would be propagated by delivering SEI to a lower exception level, for
> example if
> an EL2 system register is 'infected'.
>
> This is the same for Qemu/kvmtool. A contained error at EL2 may be an
> uncontained error if we hand it to guest EL1. Linux's RAS code will decide
> this
> with its choice of signal to send, (and possibly which code to set).
> Qemu/kvmtool need to choose an appropriate APEI notification, which may
> involve
> generating a relevant ESR.
>
> Also relevant is the problem we discussed earlier with trying to deliver
> fake
> Physical-SError from software at EL3: If the SError is routed to EL2, and
> EL2
> has PSTATE.A masked, EL3 has to wait and try again later. This is another
> case
> where firmware may have to upgrade the classification of an error to
> uncontainable.
   it makes sense. thanks to James.

>
>
> Thanks,
>
> James
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
       [not found] <CAMj-D2BGTDKpOcMu2ip41_MTTj8VDwvs59Ds7yvLHcD8PeQzhg@mail.gmail.com>
  2017-05-05 12:31 ` [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error gengdongjiu
@ 2017-05-08 17:28 ` James Morse
  2017-05-08 17:54   ` Christoffer Dall
  2017-05-10  8:44   ` gengdongjiu
  1 sibling, 2 replies; 31+ messages in thread
From: James Morse @ 2017-05-08 17:28 UTC (permalink / raw)
  To: linux-arm-kernel

Hi gengdongjiu,

On 04/05/17 17:52, gengdongjiu wrote:
> 2017-05-04 23:42 GMT+08:00 gengdongjiu <gengdj.1984@gmail.com>:
>> On 30/04/17 06:37, Dongjiu Geng wrote:
>>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>>> index 105b6ab..a96594f 100644
>>> --- a/arch/arm/kvm/mmu.c
>>> +++ b/arch/arm/kvm/mmu.c
>>> +static void kvm_handle_bad_page(unsigned long address,
>>> + bool hugetlb, bool hwpoison)
>>> +{
>>> + /* handle both hwpoison and other synchronous external Abort */
>>> + if (hwpoison)
>>> + kvm_send_signal(address, hugetlb, true);
>>> + else
>>> + kvm_send_signal(address, hugetlb, false);
>>> +}
>>
>> Why the extra level of indirection? We only want to signal userspace like this
>> from KVM for hwpoison. Signals for RAS related reasons should come from the bits
>> of the kernel that decoded the error.
> 
> For the SEA, the are maily two types:
> 0b010000 Synchronous External Abort on memory access.
> 0b0101xx Synchronous External Abort on page table walk. DFSC[1:0]
> encode the level.

(KVM shouldn't have to make decisions about this)

> hwpoison should belong to the  "Synchronous External Abort on memory access"
> if the SEA type is not hwpoison, such as page table walk, do you mean
> KVM do not deliver the SIGBUS?

The flow of events should be SEI/SEA from firmware to the hosts's APEI code. KVM
should only be involved to get us back to the host if we were running a guest.
The APEI/hwpoison code may cause a set of processes to be sent signals. The code
in mm/memory-failure.c does this by walking the process rmaps using the physical
addresses in the CPER records.

We want user space to be sent signals as this can (and should) work in exactly
the same way on arm64 as it does on x86 or any other architecture. If a
web-browser can handle SIGBUS notifications for memory-corruption, it shouldn't
have to care what architecture it is running on.

So what is that KVM+SIGBUS patch about?...

>> (hwpoison for KVM is a corner case as Qemu's memory effectively has two users,
>> Qemu and KVM. This isn't the example of how user-space gets signalled.)

KVM creates guests as if they were additional users of Qemu's memory. The code
in mm/memory-failure.c may find that Qemu didn't have the affected page mapped
to user-space - but it may have been in use by stage2.

The KVM+SIGBUS patch hides this difference, meaning Qemu gets a signal when the
guest touches the hwpoison page as if Qemu had touched the page itself.

Signals from KVM is a corner case, for firmware-first decisions should happen in
the APEI code based on CPER records.

> If so, how the KVM handle the SEA type other than hwpoison?

To deliver to a guest? It shouldn't have to know, user space should use a KVM
API to drive this.

When received from hardware? It shouldn't have to care, these things should be
passed into the APEI code for handling. KVM just needs to put the host registers
back.

>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>> index bb02909..1d2e2e7 100644
>>> --- a/include/uapi/linux/kvm.h
>>> +++ b/include/uapi/linux/kvm.h
>>> @@ -1306,6 +1306,7 @@ struct kvm_s390_ucas_mapping {
>>>  #define KVM_S390_GET_IRQ_STATE  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
>>>  /* Available with KVM_CAP_X86_SMM */
>>>  #define KVM_SMI                   _IO(KVMIO,   0xb7)
>>> +#define KVM_ARM_SEA               _IO(KVMIO,   0xb8)
>>>
>>>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
>>>  #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
>>>
>>
>> Why do we need a userspace API for SEA? It can also be done by using
>> KVM_{G,S}ET_ONE_REG to change the vcpu registers. The advantage of doing it this
>> way is you can choose which ESR value to use.
>>
>> Adding a new API call to do something you could do with an old one doesn't look
>> right.
> 
> James, I considered your suggestion before that use the
> KVM_{G,S}ET_ONE_REG to change the vcpu registers. but I found it does
> not have difference to use the alread existed KVM API. 

(Only that is an in-kernel helper, not a published API)

> so may be
> changing the vcpu registers in qemu will duplicate with the KVM APIs.

That is true, but the alternative is a new API that doesn't do anything new, its
just more convenient.

Marc and Christoffer are the people to convince.
I argue the existing API is sufficient.

> injection a SEA is no more than setting some registers: elr_el1, PC,
> PSTATE, SPSR_el1, far_el1, esr_el1
> I seen this KVM API do the same thing as Qemu.  do you found call this
> API will have issue and necessary to choose another ESR value?

Should we let user-space pick the ESR to deliver to the guest? Yes, letting
user-space specify the ESR gives the most flexibility to do something clever in
the future. An obvious choice for SEA is between the external-abort and 'parity
or ECC error' codes. If we tell user-space which of these happened (I don't
think Linux does today) then Qemu can relay that information to the guest.

Thanks,

James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-08 17:28 ` James Morse
@ 2017-05-08 17:54   ` Christoffer Dall
  2017-05-09 14:28     ` James Morse
  2017-05-10  8:44   ` gengdongjiu
  1 sibling, 1 reply; 31+ messages in thread
From: Christoffer Dall @ 2017-05-08 17:54 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, May 08, 2017 at 06:28:02PM +0100, James Morse wrote:

[...]

> 
> 
> >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> >>> index bb02909..1d2e2e7 100644
> >>> --- a/include/uapi/linux/kvm.h
> >>> +++ b/include/uapi/linux/kvm.h
> >>> @@ -1306,6 +1306,7 @@ struct kvm_s390_ucas_mapping {
> >>>  #define KVM_S390_GET_IRQ_STATE  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
> >>>  /* Available with KVM_CAP_X86_SMM */
> >>>  #define KVM_SMI                   _IO(KVMIO,   0xb7)
> >>> +#define KVM_ARM_SEA               _IO(KVMIO,   0xb8)
> >>>
> >>>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
> >>>  #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
> >>>
> >>
> >> Why do we need a userspace API for SEA? It can also be done by using
> >> KVM_{G,S}ET_ONE_REG to change the vcpu registers. The advantage of doing it this
> >> way is you can choose which ESR value to use.
> >>
> >> Adding a new API call to do something you could do with an old one doesn't look
> >> right.
> > 
> > James, I considered your suggestion before that use the
> > KVM_{G,S}ET_ONE_REG to change the vcpu registers. but I found it does
> > not have difference to use the alread existed KVM API. 
> 
> (Only that is an in-kernel helper, not a published API)
> 
> 
> > so may be
> > changing the vcpu registers in qemu will duplicate with the KVM APIs.
> 
> That is true, but the alternative is a new API that doesn't do anything new, its
> just more convenient.
> 
> Marc and Christoffer are the people to convince.
> I argue the existing API is sufficient.
> 

I must admit I am losing track of exactly what this proposed API was
supposed to do.

However, if it's a question about setting up VCPU registers to a certain
state and potentially modifying memory, then I think experience has
shown us (psci) that emulating something in the kernel that userspace
can have fine-grained control over is a bad idea, and should be left to
userspace using as generic APIs as possible.

Furthermore, if I understand what injecting a SEA requires, it is very
similar to resetting the CPU and loading data into guest memory, which
QEMU already does today, and there is no reason to introduce additional
APIs if it can be done using KVM_GET/SET_ONE_REG ioctls.

Thanks,
-Christoffer

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-08 17:54   ` Christoffer Dall
@ 2017-05-09 14:28     ` James Morse
  2017-05-10  9:15       ` gengdongjiu
  0 siblings, 1 reply; 31+ messages in thread
From: James Morse @ 2017-05-09 14:28 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Christoffer,

On 08/05/17 18:54, Christoffer Dall wrote:
> On Mon, May 08, 2017 at 06:28:02PM +0100, James Morse wrote:
> I must admit I am losing track of exactly what this proposed API was
> supposed to do.

There are two, and we keep jumping between them!
This is about two notification methods APEI has for arm64, 'SEA' and 'SEI'.

SEA is synchronous and looks like a data abort. Qemu/kvmtool can inject these
today using the KVM_GET/SET_ONE_REG API whenever it wants to.

SEI uses SError, is asynchronous and can be masked. In addition these need to be
consumed/synchronised by the ESB instruction, even when executed by a guest.
Hardware has the necessary bits to drive all this, we need to expose an API to
drive it.

(I try to spell them out each time so I don't confuse SEI with something
synchronous!)

This patch was about SEA. I think you've answered our question:

> However, if it's a question about setting up VCPU registers to a certain
> state and potentially modifying memory, then I think experience has
> shown us (psci) that emulating something in the kernel that userspace
> can have fine-grained control over is a bad idea, and should be left to
> userspace using as generic APIs as possible.
> 
> Furthermore, if I understand what injecting a SEA requires, it is very
> similar to resetting the CPU and loading data into guest memory, which
> QEMU already does today, and there is no reason to introduce additional
> APIs if it can be done using KVM_GET/SET_ONE_REG ioctls.

Thanks,

James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-09 14:28     ` James Morse
@ 2017-05-10  9:15       ` gengdongjiu
  2017-05-10 12:20         ` Christoffer Dall
  0 siblings, 1 reply; 31+ messages in thread
From: gengdongjiu @ 2017-05-10  9:15 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks James's explanation.

Hi Christoffer,

On 2017/5/9 22:28, James Morse wrote:
> Hi Christoffer,
> 
> On 08/05/17 18:54, Christoffer Dall wrote:
>> On Mon, May 08, 2017 at 06:28:02PM +0100, James Morse wrote:
>> I must admit I am losing track of exactly what this proposed API was
>> supposed to do.
> 
> There are two, and we keep jumping between them!
> This is about two notification methods APEI has for arm64, 'SEA' and 'SEI'.
> 
> SEA is synchronous and looks like a data abort. Qemu/kvmtool can inject these
> today using the KVM_GET/SET_ONE_REG API whenever it wants to.
> 
> SEI uses SError, is asynchronous and can be masked. In addition these need to be
> consumed/synchronised by the ESB instruction, even when executed by a guest.
> Hardware has the necessary bits to drive all this, we need to expose an API to
> drive it.
> 
> (I try to spell them out each time so I don't confuse SEI with something
> synchronous!)
> 
> 
> This patch was about SEA. I think you've answered our question:

we are talking about the SEA(synchronous data abort) injection two methods:

(1)change vcpu registers in the Qemu/kvmtools and using the KVM_GET/SET_ONE_REG API to set.
(2)using existed in-kernel API "kvm_inject_dabt" to inject through IOCTL command from Qemu.


> 
>> However, if it's a question about setting up VCPU registers to a certain
>> state and potentially modifying memory, then I think experience has
>> shown us (psci) that emulating something in the kernel that userspace
>> can have fine-grained control over is a bad idea, and should be left to
>> userspace using as generic APIs as possible.
>>
>> Furthermore, if I understand what injecting a SEA requires, it is very
>> similar to resetting the CPU and loading data into guest memory, which
>> QEMU already does today, and there is no reason to introduce additional
>> APIs if it can be done using KVM_GET/SET_ONE_REG ioctls.
> 
> 
> Thanks,
> 
> James
> 
> .
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-10  9:15       ` gengdongjiu
@ 2017-05-10 12:20         ` Christoffer Dall
  2017-05-10 12:37           ` gengdongjiu
  0 siblings, 1 reply; 31+ messages in thread
From: Christoffer Dall @ 2017-05-10 12:20 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, May 10, 2017 at 05:15:04PM +0800, gengdongjiu wrote:
> Thanks James's explanation.
> 
> Hi Christoffer,
> 
> On 2017/5/9 22:28, James Morse wrote:
> > Hi Christoffer,
> > 
> > On 08/05/17 18:54, Christoffer Dall wrote:
> >> On Mon, May 08, 2017 at 06:28:02PM +0100, James Morse wrote:
> >> I must admit I am losing track of exactly what this proposed API was
> >> supposed to do.
> > 
> > There are two, and we keep jumping between them!
> > This is about two notification methods APEI has for arm64, 'SEA' and 'SEI'.
> > 
> > SEA is synchronous and looks like a data abort. Qemu/kvmtool can inject these
> > today using the KVM_GET/SET_ONE_REG API whenever it wants to.
> > 
> > SEI uses SError, is asynchronous and can be masked. In addition these need to be
> > consumed/synchronised by the ESB instruction, even when executed by a guest.
> > Hardware has the necessary bits to drive all this, we need to expose an API to
> > drive it.
> > 
> > (I try to spell them out each time so I don't confuse SEI with something
> > synchronous!)
> > 
> > 
> > This patch was about SEA. I think you've answered our question:
> 
> we are talking about the SEA(synchronous data abort) injection two methods:
> 
> (1)change vcpu registers in the Qemu/kvmtools and using the KVM_GET/SET_ONE_REG API to set.

Yes, if this is possible, why would you want something more?

> (2)using existed in-kernel API "kvm_inject_dabt" to inject through IOCTL command from Qemu.
> 

I'm not really going to consider this, because "use internal API from
userspace" doesn't work.

So this should be:

  (2) Introduce a new API to do X.

I still think you know what my preference is; use the existing API if at
all possible.

Thanks,
-Christoffer

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-10 12:20         ` Christoffer Dall
@ 2017-05-10 12:37           ` gengdongjiu
  0 siblings, 0 replies; 31+ messages in thread
From: gengdongjiu @ 2017-05-10 12:37 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Christoffer,

On 2017/5/10 20:20, Christoffer Dall wrote:
> On Wed, May 10, 2017 at 05:15:04PM +0800, gengdongjiu wrote:
>> Thanks James's explanation.
>>
>> Hi Christoffer,
>>
>> On 2017/5/9 22:28, James Morse wrote:
>>> Hi Christoffer,
>>>
>>> On 08/05/17 18:54, Christoffer Dall wrote:
>>>> On Mon, May 08, 2017 at 06:28:02PM +0100, James Morse wrote:
>>>> I must admit I am losing track of exactly what this proposed API was
>>>> supposed to do.
>>>
>>> There are two, and we keep jumping between them!
>>> This is about two notification methods APEI has for arm64, 'SEA' and 'SEI'.
>>>
>>> SEA is synchronous and looks like a data abort. Qemu/kvmtool can inject these
>>> today using the KVM_GET/SET_ONE_REG API whenever it wants to.
>>>
>>> SEI uses SError, is asynchronous and can be masked. In addition these need to be
>>> consumed/synchronised by the ESB instruction, even when executed by a guest.
>>> Hardware has the necessary bits to drive all this, we need to expose an API to
>>> drive it.
>>>
>>> (I try to spell them out each time so I don't confuse SEI with something
>>> synchronous!)
>>>
>>>
>>> This patch was about SEA. I think you've answered our question:
>>
>> we are talking about the SEA(synchronous data abort) injection two methods:
>>
>> (1)change vcpu registers in the Qemu/kvmtools and using the KVM_GET/SET_ONE_REG API to set.
> 
> Yes, if this is possible, why would you want something more?
  we will use this method.

> 
>> (2)using existed in-kernel API "kvm_inject_dabt" to inject through IOCTL command from Qemu.
>>
> 
> I'm not really going to consider this, because "use internal API from
> userspace" doesn't work.
> 
> So this should be:
> 
>   (2) Introduce a new API to do X.
  you can ignore the second method, now we will not use it.

> 
> I still think you know what my preference is; use the existing API if at
> all possible.
> 
> Thanks,
> -Christoffer
> 
> .
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-08 17:28 ` James Morse
  2017-05-08 17:54   ` Christoffer Dall
@ 2017-05-10  8:44   ` gengdongjiu
  2017-05-12 17:25     ` James Morse
  1 sibling, 1 reply; 31+ messages in thread
From: gengdongjiu @ 2017-05-10  8:44 UTC (permalink / raw)
  To: linux-arm-kernel

Hi James,
  thanks a lot for your answer.

On 2017/5/9 1:28, James Morse wrote:
> Hi gengdongjiu,
> 
> On 04/05/17 17:52, gengdongjiu wrote:
>> 2017-05-04 23:42 GMT+08:00 gengdongjiu <gengdj.1984@gmail.com>:
>>> On 30/04/17 06:37, Dongjiu Geng wrote:
>>>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>>>> index 105b6ab..a96594f 100644
>>>> --- a/arch/arm/kvm/mmu.c
>>>> +++ b/arch/arm/kvm/mmu.c
>>>> +static void kvm_handle_bad_page(unsigned long address,
>>>> + bool hugetlb, bool hwpoison)
>>>> +{
>>>> + /* handle both hwpoison and other synchronous external Abort */
>>>> + if (hwpoison)
>>>> + kvm_send_signal(address, hugetlb, true);
>>>> + else
>>>> + kvm_send_signal(address, hugetlb, false);
>>>> +}
>>>
>>> Why the extra level of indirection? We only want to signal userspace like this
>>> from KVM for hwpoison. Signals for RAS related reasons should come from the bits
>>> of the kernel that decoded the error.
>>
>> For the SEA, the are maily two types:
>> 0b010000 Synchronous External Abort on memory access.
>> 0b0101xx Synchronous External Abort on page table walk. DFSC[1:0]
>> encode the level.
> 
> (KVM shouldn't have to make decisions about this)
> 
> 
>> hwpoison should belong to the  "Synchronous External Abort on memory access"
>> if the SEA type is not hwpoison, such as page table walk, do you mean
>> KVM do not deliver the SIGBUS?
> 
> 
> The flow of events should be SEI/SEA from firmware to the hosts's APEI code. KVM
> should only be involved to get us back to the host if we were running a guest.
> The APEI/hwpoison code may cause a set of processes to be sent signals. The code
> in mm/memory-failure.c does this by walking the process rmaps using the physical
> addresses in the CPER records.
> 
> We want user space to be sent signals as this can (and should) work in exactly
> the same way on arm64 as it does on x86 or any other architecture. If a
> web-browser can handle SIGBUS notifications for memory-corruption, it shouldn't
> have to care what architecture it is running on.
 Ok, James, understand.

> 
> So what is that KVM+SIGBUS patch about?...
> 
>>> (hwpoison for KVM is a corner case as Qemu's memory effectively has two users,
>>> Qemu and KVM. This isn't the example of how user-space gets signalled.)
> 
> KVM creates guests as if they were additional users of Qemu's memory. The code
> in mm/memory-failure.c may find that Qemu didn't have the affected page mapped
> to user-space - but it may have been in use by stage2.
> 
> The KVM+SIGBUS patch hides this difference, meaning Qemu gets a signal when the
> guest touches the hwpoison page as if Qemu had touched the page itself.
> 
> Signals from KVM is a corner case, for firmware-first decisions should happen in
> the APEI code based on CPER records.
> 
> 
>> If so, how the KVM handle the SEA type other than hwpoison?
> 
> To deliver to a guest? It shouldn't have to know, user space should use a KVM
> API to drive this.
> 
> When received from hardware? It shouldn't have to care, these things should be
> passed into the APEI code for handling. KVM just needs to put the host registers
> back.
Recently I confirmed with the hardware team. they said almost all the SEA errors have the
Poison flag, so may be there is no need to consider other SEA errors other than  hwPoison.
only consider SEA hwpoison errors can be enough.

> 
> 
>>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>>> index bb02909..1d2e2e7 100644
>>>> --- a/include/uapi/linux/kvm.h
>>>> +++ b/include/uapi/linux/kvm.h
>>>> @@ -1306,6 +1306,7 @@ struct kvm_s390_ucas_mapping {
>>>>  #define KVM_S390_GET_IRQ_STATE  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
>>>>  /* Available with KVM_CAP_X86_SMM */
>>>>  #define KVM_SMI                   _IO(KVMIO,   0xb7)
>>>> +#define KVM_ARM_SEA               _IO(KVMIO,   0xb8)
>>>>
>>>>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
>>>>  #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
>>>>
>>>
>>> Why do we need a userspace API for SEA? It can also be done by using
>>> KVM_{G,S}ET_ONE_REG to change the vcpu registers. The advantage of doing it this
>>> way is you can choose which ESR value to use.
>>>
>>> Adding a new API call to do something you could do with an old one doesn't look
>>> right.
>>
>> James, I considered your suggestion before that use the
>> KVM_{G,S}ET_ONE_REG to change the vcpu registers. but I found it does
>> not have difference to use the alread existed KVM API. 
> 
> (Only that is an in-kernel helper, not a published API)

yes, the kvm_inject_dabt is an in-kernel API.

> 
> 
>> so may be
>> changing the vcpu registers in qemu will duplicate with the KVM APIs.
> 
> That is true, but the alternative is a new API that doesn't do anything new, its
> just more convenient.
> 
> Marc and Christoffer are the people to convince.
> I argue the existing API is sufficient.
> 
> 
>> injection a SEA is no more than setting some registers: elr_el1, PC,
>> PSTATE, SPSR_el1, far_el1, esr_el1
>> I seen this KVM API do the same thing as Qemu.  do you found call this
>> API will have issue and necessary to choose another ESR value?
> 
> Should we let user-space pick the ESR to deliver to the guest? Yes, letting
> user-space specify the ESR gives the most flexibility to do something clever in
> the future. An obvious choice for SEA is between the external-abort and 'parity
> or ECC error' codes. If we tell user-space which of these happened (I don't
> think Linux does today) then Qemu can relay that information to the guest.

may be the ESR is delivered by the KVM.
(1) guest OS EL0 happen SEA due to hwpoison
(2) CPU traps to EL3 firmware, and update the ESR_EL3
(3) the EL3 firmware copies the ESR_EL3 to ESR_EL2
(4) then jump to EL2 hypervisor, hypervisor uses the ESR_EL2 to inject the SEA.

May be the esr_el2 can provide the accurate error information.
or do you think user-space specify the ESR instead of esr_el2 is better?


> 
> 
> 
> Thanks,
> 
> James
> 
> .
> 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-10  8:44   ` gengdongjiu
@ 2017-05-12 17:25     ` James Morse
  2017-05-21  9:23       ` gengdongjiu
  0 siblings, 1 reply; 31+ messages in thread
From: James Morse @ 2017-05-12 17:25 UTC (permalink / raw)
  To: linux-arm-kernel

Hi gengdongjiu,

On 10/05/17 09:44, gengdongjiu wrote:
> On 2017/5/9 1:28, James Morse wrote:
>>>> (hwpoison for KVM is a corner case as Qemu's memory effectively has two users,
>>>> Qemu and KVM. This isn't the example of how user-space gets signalled.)
>>
>> KVM creates guests as if they were additional users of Qemu's memory. The code
>> in mm/memory-failure.c may find that Qemu didn't have the affected page mapped
>> to user-space - but it may have been in use by stage2.
>>
>> The KVM+SIGBUS patch hides this difference, meaning Qemu gets a signal when the
>> guest touches the hwpoison page as if Qemu had touched the page itself.
>>
>> Signals from KVM is a corner case, for firmware-first decisions should happen in
>> the APEI code based on CPER records.

>>> If so, how the KVM handle the SEA type other than hwpoison?

>> To deliver to a guest? It shouldn't have to know, user space should use a KVM
>> API to drive this.
>>
>> When received from hardware? It shouldn't have to care, these things should be
>> passed into the APEI code for handling. KVM just needs to put the host registers
>> back.

> Recently I confirmed with the hardware team. they said almost all the SEA errors have the
> Poison flag, so may be there is no need to consider other SEA errors other than  hwPoison.
> only consider SEA hwpoison errors can be enough.

We should be careful here, by hwpoison I meant the Linux feature.
>From Documentation/vm/hwpoison.txt:
> Upcoming Intel CPUs have support for recovering from some memory errors
> (``MCA recovery''). This requires the OS to declare a page "poisoned",
> kill the processes associated with it and avoid using it in the future.

We were talking about KVM's reaction to 'the OS declaring a page poisoned'.
Lets try to call this one memory-failure, as that is its Kconfig name. (now I
understand why we've been confusing each other!)

Your hwpoison looks like something the CPU reports in the ERR<n>STATUS registers
(4.6.10 of DDI0587). This is something firmware should read, then describe to
the OS via CPER records. Depending on these CPER records linux may invoke its
memory-failure code.


>>> injection a SEA is no more than setting some registers: elr_el1, PC,
>>> PSTATE, SPSR_el1, far_el1, esr_el1
>>> I seen this KVM API do the same thing as Qemu.  do you found call this
>>> API will have issue and necessary to choose another ESR value?
>>
>> Should we let user-space pick the ESR to deliver to the guest? Yes, letting
>> user-space specify the ESR gives the most flexibility to do something clever in
>> the future. An obvious choice for SEA is between the external-abort and 'parity
>> or ECC error' codes. If we tell user-space which of these happened (I don't
>> think Linux does today) then Qemu can relay that information to the guest.

> may be the ESR is delivered by the KVM.
> (1) guest OS EL0 happen SEA due to hwpoison
> (2) CPU traps to EL3 firmware, and update the ESR_EL3
> (3) the EL3 firmware copies the ESR_EL3 to ESR_EL2
> (4) then jump to EL2 hypervisor, hypervisor uses the ESR_EL2 to inject the SEA.
> 
> May be the esr_el2 can provide the accurate error information.
> or do you think user-space specify the ESR instead of esr_el2 is better?

I think the severity needs to be considered as the notification is handled by
each exception level. There are cases where it will need to be upgraded from
'contained' to 'uncontained'. (more discussion on another part of the thread).


Thanks,

James

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error
  2017-05-12 17:25     ` James Morse
@ 2017-05-21  9:23       ` gengdongjiu
  0 siblings, 0 replies; 31+ messages in thread
From: gengdongjiu @ 2017-05-21  9:23 UTC (permalink / raw)
  To: linux-arm-kernel

2017-05-13 1:25 GMT+08:00, James Morse <james.morse@arm.com>:
> Hi gengdongjiu,
>
> On 10/05/17 09:44, gengdongjiu wrote:
>> On 2017/5/9 1:28, James Morse wrote:
>>>>> (hwpoison for KVM is a corner case as Qemu's memory effectively has two
>>>>> users,
>>>>> Qemu and KVM. This isn't the example of how user-space gets
>>>>> signalled.)
>>>
>>> KVM creates guests as if they were additional users of Qemu's memory. The
>>> code
>>> in mm/memory-failure.c may find that Qemu didn't have the affected page
>>> mapped
>>> to user-space - but it may have been in use by stage2.
>>>
>>> The KVM+SIGBUS patch hides this difference, meaning Qemu gets a signal
>>> when the
>>> guest touches the hwpoison page as if Qemu had touched the page itself.
>>>
>>> Signals from KVM is a corner case, for firmware-first decisions should
>>> happen in
>>> the APEI code based on CPER records.
>
>>>> If so, how the KVM handle the SEA type other than hwpoison?
>
>>> To deliver to a guest? It shouldn't have to know, user space should use a
>>> KVM
>>> API to drive this.
>>>
>>> When received from hardware? It shouldn't have to care, these things
>>> should be
>>> passed into the APEI code for handling. KVM just needs to put the host
>>> registers
>>> back.
>
>> Recently I confirmed with the hardware team. they said almost all the SEA
>> errors have the
>> Poison flag, so may be there is no need to consider other SEA errors other
>> than  hwPoison.
>> only consider SEA hwpoison errors can be enough.
>
> We should be careful here, by hwpoison I meant the Linux feature.
> From Documentation/vm/hwpoison.txt:
>> Upcoming Intel CPUs have support for recovering from some memory errors
>> (``MCA recovery''). This requires the OS to declare a page "poisoned",
>> kill the processes associated with it and avoid using it in the future.
>
> We were talking about KVM's reaction to 'the OS declaring a page poisoned'.
> Lets try to call this one memory-failure, as that is its Kconfig name. (now
> I
> understand why we've been confusing each other!)
>
> Your hwpoison looks like something the CPU reports in the ERR<n>STATUS
> registers
> (4.6.10 of DDI0587). This is something firmware should read, then describe
> to
> the OS via CPER records. Depending on these CPER records linux may invoke
> its
> memory-failure code.
  yes

>
>
>>>> injection a SEA is no more than setting some registers: elr_el1, PC,
>>>> PSTATE, SPSR_el1, far_el1, esr_el1
>>>> I seen this KVM API do the same thing as Qemu.  do you found call this
>>>> API will have issue and necessary to choose another ESR value?
>>>
>>> Should we let user-space pick the ESR to deliver to the guest? Yes,
>>> letting
>>> user-space specify the ESR gives the most flexibility to do something
>>> clever in
>>> the future. An obvious choice for SEA is between the external-abort and
>>> 'parity
>>> or ECC error' codes. If we tell user-space which of these happened (I
>>> don't
>>> think Linux does today) then Qemu can relay that information to the
>>> guest.
>
>> may be the ESR is delivered by the KVM.
>> (1) guest OS EL0 happen SEA due to hwpoison
>> (2) CPU traps to EL3 firmware, and update the ESR_EL3
>> (3) the EL3 firmware copies the ESR_EL3 to ESR_EL2
>> (4) then jump to EL2 hypervisor, hypervisor uses the ESR_EL2 to inject the
>> SEA.
>>
>> May be the esr_el2 can provide the accurate error information.
>> or do you think user-space specify the ESR instead of esr_el2 is better?
>
> I think the severity needs to be considered as the notification is handled
> by
> each exception level. There are cases where it will need to be upgraded
> from
> 'contained' to 'uncontained'. (more discussion on another part of the
> thread).
  understand it.

>
>
> Thanks,
>
> James
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2017-06-26  5:22 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-30  5:37 [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Dongjiu Geng
2017-04-30  5:37 ` [PATCH v3 2/3] arm64: kvm: inject SError with virtual syndrome Dongjiu Geng
2017-05-02  8:03   ` Christoffer Dall
2017-05-02 12:20     ` gengdongjiu
2017-05-02 15:37   ` James Morse
2017-05-05 13:19     ` gengdongjiu
2017-05-12 17:24       ` James Morse
2017-05-21  9:08         ` gengdongjiu
2017-04-30  5:37 ` [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error Dongjiu Geng
2017-05-02 15:41   ` James Morse
2017-05-02  7:56 ` [PATCH v3 1/3] arm64: kvm: support kvmtool to detect RAS extension feature Christoffer Dall
2017-05-02 11:05   ` gengdongjiu
2017-05-02 12:15   ` gengdongjiu
2017-05-02 15:48   ` Paolo Bonzini
2017-05-04  8:19     ` James Morse
2017-05-02 15:29 ` James Morse
2017-05-04 15:49   ` James Morse
2017-05-05 12:44     ` gengdongjiu
2017-06-26  5:22   ` gengdongjiu
     [not found] <CAMj-D2BGTDKpOcMu2ip41_MTTj8VDwvs59Ds7yvLHcD8PeQzhg@mail.gmail.com>
2017-05-05 12:31 ` [PATCH v3 3/3] arm/arm64: signal SIBGUS and inject SEA Error gengdongjiu
2017-05-12 17:24   ` James Morse
2017-05-21  8:24     ` gengdongjiu
2017-05-08 17:28 ` James Morse
2017-05-08 17:54   ` Christoffer Dall
2017-05-09 14:28     ` James Morse
2017-05-10  9:15       ` gengdongjiu
2017-05-10 12:20         ` Christoffer Dall
2017-05-10 12:37           ` gengdongjiu
2017-05-10  8:44   ` gengdongjiu
2017-05-12 17:25     ` James Morse
2017-05-21  9:23       ` gengdongjiu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).