All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 1/3] KVM: arm64: Add arch specific exit reasons
@ 2021-09-22  1:08 ` Jing Zhang
  0 siblings, 0 replies; 28+ messages in thread
From: Jing Zhang @ 2021-09-22  1:08 UTC (permalink / raw)
  To: KVM, KVMARM, Paolo Bonzini, Marc Zyngier, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Sean Christopherson
  Cc: Jing Zhang

Arch specific exit reasons have been available for other architectures.
Add arch specific exit reason support for ARM64, which would be used in
KVM stats for monitoring VCPU status.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
 arch/arm64/include/asm/kvm_emulate.h |  5 +++
 arch/arm64/include/asm/kvm_host.h    | 33 +++++++++++++++
 arch/arm64/kvm/handle_exit.c         | 62 +++++++++++++++++++++++++---
 arch/arm64/kvm/mmu.c                 |  4 ++
 arch/arm64/kvm/sys_regs.c            |  6 +++
 5 files changed, 105 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fd418955e31e..eb5ec3a479d3 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -319,6 +319,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
 	return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW;
 }
 
+static inline bool kvm_vcpu_trap_is_dabt(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW;
+}
+
 static inline bool kvm_vcpu_trap_is_exec_fault(const struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_trap_is_iabt(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f8be56d5342b..0f0cea26ce32 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -278,6 +278,36 @@ struct vcpu_reset_state {
 	bool		reset;
 };
 
+enum arm_exit_reason {
+	ARM_EXIT_UNKNOWN,
+	ARM_EXIT_IRQ,
+	ARM_EXIT_EL1_SERROR,
+	ARM_EXIT_HYP_GONE,
+	ARM_EXIT_IL,
+	ARM_EXIT_WFI,
+	ARM_EXIT_WFE,
+	ARM_EXIT_CP15_32,
+	ARM_EXIT_CP15_64,
+	ARM_EXIT_CP14_32,
+	ARM_EXIT_CP14_LS,
+	ARM_EXIT_CP14_64,
+	ARM_EXIT_HVC32,
+	ARM_EXIT_SMC32,
+	ARM_EXIT_HVC64,
+	ARM_EXIT_SMC64,
+	ARM_EXIT_SYS64,
+	ARM_EXIT_SVE,
+	ARM_EXIT_IABT_LOW,
+	ARM_EXIT_DABT_LOW,
+	ARM_EXIT_SOFTSTP_LOW,
+	ARM_EXIT_WATCHPT_LOW,
+	ARM_EXIT_BREAKPT_LOW,
+	ARM_EXIT_BKPT32,
+	ARM_EXIT_BRK64,
+	ARM_EXIT_FP_ASIMD,
+	ARM_EXIT_PAC,
+};
+
 struct kvm_vcpu_arch {
 	struct kvm_cpu_context ctxt;
 	void *sve_state;
@@ -384,6 +414,9 @@ struct kvm_vcpu_arch {
 		u64 last_steal;
 		gpa_t base;
 	} steal;
+
+	/* Arch specific exit reason */
+	enum arm_exit_reason exit_reason;
 };
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 275a27368a04..90a47758b23d 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -49,6 +49,18 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static int handle_hvc32(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.exit_reason = ARM_EXIT_HVC32;
+	return handle_hvc(vcpu);
+}
+
+static int handle_hvc64(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.exit_reason = ARM_EXIT_HVC64;
+	return handle_hvc(vcpu);
+}
+
 static int handle_smc(struct kvm_vcpu *vcpu)
 {
 	/*
@@ -64,12 +76,25 @@ static int handle_smc(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_smc32(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.exit_reason = ARM_EXIT_SMC32;
+	return handle_smc(vcpu);
+}
+
+static int handle_smc64(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.exit_reason = ARM_EXIT_SMC64;
+	return handle_smc(vcpu);
+}
+
 /*
  * Guest access to FP/ASIMD registers are routed to this handler only
  * when the system doesn't support FP/ASIMD.
  */
 static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_FP_ASIMD;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -91,10 +116,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
 	if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
 		vcpu->stat.wfe_exit_stat++;
+		vcpu->arch.exit_reason = ARM_EXIT_WFE;
 		kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
 	} else {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
 		vcpu->stat.wfi_exit_stat++;
+		vcpu->arch.exit_reason = ARM_EXIT_WFI;
 		kvm_vcpu_block(vcpu);
 		kvm_clear_request(KVM_REQ_UNHALT, vcpu);
 	}
@@ -119,12 +146,29 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	u32 esr = kvm_vcpu_get_esr(vcpu);
+	u8 esr_ec = ESR_ELx_EC(esr);
 
 	run->exit_reason = KVM_EXIT_DEBUG;
 	run->debug.arch.hsr = esr;
 
-	if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
+	switch (esr_ec) {
+	case ESR_ELx_EC_SOFTSTP_LOW:
+		vcpu->arch.exit_reason = ARM_EXIT_SOFTSTP_LOW;
+		break;
+	case ESR_ELx_EC_WATCHPT_LOW:
 		run->debug.arch.far = vcpu->arch.fault.far_el2;
+		vcpu->arch.exit_reason = ARM_EXIT_WATCHPT_LOW;
+		break;
+	case ESR_ELx_EC_BREAKPT_LOW:
+		vcpu->arch.exit_reason = ARM_EXIT_BREAKPT_LOW;
+		break;
+	case ESR_ELx_EC_BKPT32:
+		vcpu->arch.exit_reason = ARM_EXIT_BKPT32;
+		break;
+	case ESR_ELx_EC_BRK64:
+		vcpu->arch.exit_reason = ARM_EXIT_BRK64;
+		break;
+	}
 
 	return 0;
 }
@@ -136,12 +180,14 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
 	kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n",
 		      esr, esr_get_class_string(esr));
 
+	vcpu->arch.exit_reason = ARM_EXIT_UNKNOWN;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
 
 static int handle_sve(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_SVE;
 	/* Until SVE is supported for guests: */
 	kvm_inject_undefined(vcpu);
 	return 1;
@@ -154,6 +200,7 @@ static int handle_sve(struct kvm_vcpu *vcpu)
  */
 static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_PAC;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -166,10 +213,10 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_ELx_EC_CP14_MR]	= kvm_handle_cp14_32,
 	[ESR_ELx_EC_CP14_LS]	= kvm_handle_cp14_load_store,
 	[ESR_ELx_EC_CP14_64]	= kvm_handle_cp14_64,
-	[ESR_ELx_EC_HVC32]	= handle_hvc,
-	[ESR_ELx_EC_SMC32]	= handle_smc,
-	[ESR_ELx_EC_HVC64]	= handle_hvc,
-	[ESR_ELx_EC_SMC64]	= handle_smc,
+	[ESR_ELx_EC_HVC32]	= handle_hvc32,
+	[ESR_ELx_EC_SMC32]	= handle_smc32,
+	[ESR_ELx_EC_HVC64]	= handle_hvc64,
+	[ESR_ELx_EC_SMC64]	= handle_smc64,
 	[ESR_ELx_EC_SYS64]	= kvm_handle_sys_reg,
 	[ESR_ELx_EC_SVE]	= handle_sve,
 	[ESR_ELx_EC_IABT_LOW]	= kvm_handle_guest_abort,
@@ -230,8 +277,10 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 
 	switch (exception_index) {
 	case ARM_EXCEPTION_IRQ:
+		vcpu->arch.exit_reason = ARM_EXIT_IRQ;
 		return 1;
 	case ARM_EXCEPTION_EL1_SERROR:
+		vcpu->arch.exit_reason = ARM_EXIT_EL1_SERROR;
 		return 1;
 	case ARM_EXCEPTION_TRAP:
 		return handle_trap_exceptions(vcpu);
@@ -240,6 +289,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 		 * EL2 has been reset to the hyp-stub. This happens when a guest
 		 * is pre-empted by kvm_reboot()'s shutdown call.
 		 */
+		vcpu->arch.exit_reason = ARM_EXIT_HYP_GONE;
 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		return 0;
 	case ARM_EXCEPTION_IL:
@@ -247,11 +297,13 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 		 * We attempted an illegal exception return.  Guest state must
 		 * have been corrupted somehow.  Give up.
 		 */
+		vcpu->arch.exit_reason = ARM_EXIT_IL;
 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		return -EINVAL;
 	default:
 		kvm_pr_unimpl("Unsupported exception type: %d",
 			      exception_index);
+		vcpu->arch.exit_reason = ARM_EXIT_UNKNOWN;
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		return 0;
 	}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1a94a7ca48f2..a6a18d113c98 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1197,6 +1197,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 
 	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
+	if (is_iabt)
+		vcpu->arch.exit_reason = ARM_EXIT_IABT_LOW;
+	else if (kvm_vcpu_trap_is_dabt(vcpu))
+		vcpu->arch.exit_reason = ARM_EXIT_DABT_LOW;
 
 	/* Synchronous External Abort? */
 	if (kvm_vcpu_abt_issea(vcpu)) {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1d46e185f31e..0915dfa589c7 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2158,6 +2158,7 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
 
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP14_LS;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -2325,21 +2326,25 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
 
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP15_64;
 	return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs));
 }
 
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP15_32;
 	return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
 }
 
 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP14_64;
 	return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs));
 }
 
 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP14_32;
 	return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs));
 }
 
@@ -2397,6 +2402,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 	int ret;
 
 	trace_kvm_handle_sys_reg(esr);
+	vcpu->arch.exit_reason = ARM_EXIT_SYS64;
 
 	params = esr_sys64_to_params(esr);
 	params.regval = vcpu_get_reg(vcpu, Rt);
-- 
2.33.0.464.g1972c5931b-goog


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v1 1/3] KVM: arm64: Add arch specific exit reasons
@ 2021-09-22  1:08 ` Jing Zhang
  0 siblings, 0 replies; 28+ messages in thread
From: Jing Zhang @ 2021-09-22  1:08 UTC (permalink / raw)
  To: KVM, KVMARM, Paolo Bonzini, Marc Zyngier, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Sean Christopherson

Arch specific exit reasons have been available for other architectures.
Add arch specific exit reason support for ARM64, which would be used in
KVM stats for monitoring VCPU status.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
 arch/arm64/include/asm/kvm_emulate.h |  5 +++
 arch/arm64/include/asm/kvm_host.h    | 33 +++++++++++++++
 arch/arm64/kvm/handle_exit.c         | 62 +++++++++++++++++++++++++---
 arch/arm64/kvm/mmu.c                 |  4 ++
 arch/arm64/kvm/sys_regs.c            |  6 +++
 5 files changed, 105 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fd418955e31e..eb5ec3a479d3 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -319,6 +319,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
 	return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW;
 }
 
+static inline bool kvm_vcpu_trap_is_dabt(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW;
+}
+
 static inline bool kvm_vcpu_trap_is_exec_fault(const struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_trap_is_iabt(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f8be56d5342b..0f0cea26ce32 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -278,6 +278,36 @@ struct vcpu_reset_state {
 	bool		reset;
 };
 
+enum arm_exit_reason {
+	ARM_EXIT_UNKNOWN,
+	ARM_EXIT_IRQ,
+	ARM_EXIT_EL1_SERROR,
+	ARM_EXIT_HYP_GONE,
+	ARM_EXIT_IL,
+	ARM_EXIT_WFI,
+	ARM_EXIT_WFE,
+	ARM_EXIT_CP15_32,
+	ARM_EXIT_CP15_64,
+	ARM_EXIT_CP14_32,
+	ARM_EXIT_CP14_LS,
+	ARM_EXIT_CP14_64,
+	ARM_EXIT_HVC32,
+	ARM_EXIT_SMC32,
+	ARM_EXIT_HVC64,
+	ARM_EXIT_SMC64,
+	ARM_EXIT_SYS64,
+	ARM_EXIT_SVE,
+	ARM_EXIT_IABT_LOW,
+	ARM_EXIT_DABT_LOW,
+	ARM_EXIT_SOFTSTP_LOW,
+	ARM_EXIT_WATCHPT_LOW,
+	ARM_EXIT_BREAKPT_LOW,
+	ARM_EXIT_BKPT32,
+	ARM_EXIT_BRK64,
+	ARM_EXIT_FP_ASIMD,
+	ARM_EXIT_PAC,
+};
+
 struct kvm_vcpu_arch {
 	struct kvm_cpu_context ctxt;
 	void *sve_state;
@@ -384,6 +414,9 @@ struct kvm_vcpu_arch {
 		u64 last_steal;
 		gpa_t base;
 	} steal;
+
+	/* Arch specific exit reason */
+	enum arm_exit_reason exit_reason;
 };
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 275a27368a04..90a47758b23d 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -49,6 +49,18 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static int handle_hvc32(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.exit_reason = ARM_EXIT_HVC32;
+	return handle_hvc(vcpu);
+}
+
+static int handle_hvc64(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.exit_reason = ARM_EXIT_HVC64;
+	return handle_hvc(vcpu);
+}
+
 static int handle_smc(struct kvm_vcpu *vcpu)
 {
 	/*
@@ -64,12 +76,25 @@ static int handle_smc(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_smc32(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.exit_reason = ARM_EXIT_SMC32;
+	return handle_smc(vcpu);
+}
+
+static int handle_smc64(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.exit_reason = ARM_EXIT_SMC64;
+	return handle_smc(vcpu);
+}
+
 /*
  * Guest access to FP/ASIMD registers are routed to this handler only
  * when the system doesn't support FP/ASIMD.
  */
 static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_FP_ASIMD;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -91,10 +116,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
 	if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
 		vcpu->stat.wfe_exit_stat++;
+		vcpu->arch.exit_reason = ARM_EXIT_WFE;
 		kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
 	} else {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
 		vcpu->stat.wfi_exit_stat++;
+		vcpu->arch.exit_reason = ARM_EXIT_WFI;
 		kvm_vcpu_block(vcpu);
 		kvm_clear_request(KVM_REQ_UNHALT, vcpu);
 	}
@@ -119,12 +146,29 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	u32 esr = kvm_vcpu_get_esr(vcpu);
+	u8 esr_ec = ESR_ELx_EC(esr);
 
 	run->exit_reason = KVM_EXIT_DEBUG;
 	run->debug.arch.hsr = esr;
 
-	if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
+	switch (esr_ec) {
+	case ESR_ELx_EC_SOFTSTP_LOW:
+		vcpu->arch.exit_reason = ARM_EXIT_SOFTSTP_LOW;
+		break;
+	case ESR_ELx_EC_WATCHPT_LOW:
 		run->debug.arch.far = vcpu->arch.fault.far_el2;
+		vcpu->arch.exit_reason = ARM_EXIT_WATCHPT_LOW;
+		break;
+	case ESR_ELx_EC_BREAKPT_LOW:
+		vcpu->arch.exit_reason = ARM_EXIT_BREAKPT_LOW;
+		break;
+	case ESR_ELx_EC_BKPT32:
+		vcpu->arch.exit_reason = ARM_EXIT_BKPT32;
+		break;
+	case ESR_ELx_EC_BRK64:
+		vcpu->arch.exit_reason = ARM_EXIT_BRK64;
+		break;
+	}
 
 	return 0;
 }
@@ -136,12 +180,14 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
 	kvm_pr_unimpl("Unknown exception class: esr: %#08x -- %s\n",
 		      esr, esr_get_class_string(esr));
 
+	vcpu->arch.exit_reason = ARM_EXIT_UNKNOWN;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
 
 static int handle_sve(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_SVE;
 	/* Until SVE is supported for guests: */
 	kvm_inject_undefined(vcpu);
 	return 1;
@@ -154,6 +200,7 @@ static int handle_sve(struct kvm_vcpu *vcpu)
  */
 static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_PAC;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -166,10 +213,10 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_ELx_EC_CP14_MR]	= kvm_handle_cp14_32,
 	[ESR_ELx_EC_CP14_LS]	= kvm_handle_cp14_load_store,
 	[ESR_ELx_EC_CP14_64]	= kvm_handle_cp14_64,
-	[ESR_ELx_EC_HVC32]	= handle_hvc,
-	[ESR_ELx_EC_SMC32]	= handle_smc,
-	[ESR_ELx_EC_HVC64]	= handle_hvc,
-	[ESR_ELx_EC_SMC64]	= handle_smc,
+	[ESR_ELx_EC_HVC32]	= handle_hvc32,
+	[ESR_ELx_EC_SMC32]	= handle_smc32,
+	[ESR_ELx_EC_HVC64]	= handle_hvc64,
+	[ESR_ELx_EC_SMC64]	= handle_smc64,
 	[ESR_ELx_EC_SYS64]	= kvm_handle_sys_reg,
 	[ESR_ELx_EC_SVE]	= handle_sve,
 	[ESR_ELx_EC_IABT_LOW]	= kvm_handle_guest_abort,
@@ -230,8 +277,10 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 
 	switch (exception_index) {
 	case ARM_EXCEPTION_IRQ:
+		vcpu->arch.exit_reason = ARM_EXIT_IRQ;
 		return 1;
 	case ARM_EXCEPTION_EL1_SERROR:
+		vcpu->arch.exit_reason = ARM_EXIT_EL1_SERROR;
 		return 1;
 	case ARM_EXCEPTION_TRAP:
 		return handle_trap_exceptions(vcpu);
@@ -240,6 +289,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 		 * EL2 has been reset to the hyp-stub. This happens when a guest
 		 * is pre-empted by kvm_reboot()'s shutdown call.
 		 */
+		vcpu->arch.exit_reason = ARM_EXIT_HYP_GONE;
 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		return 0;
 	case ARM_EXCEPTION_IL:
@@ -247,11 +297,13 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 		 * We attempted an illegal exception return.  Guest state must
 		 * have been corrupted somehow.  Give up.
 		 */
+		vcpu->arch.exit_reason = ARM_EXIT_IL;
 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		return -EINVAL;
 	default:
 		kvm_pr_unimpl("Unsupported exception type: %d",
 			      exception_index);
+		vcpu->arch.exit_reason = ARM_EXIT_UNKNOWN;
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		return 0;
 	}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1a94a7ca48f2..a6a18d113c98 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1197,6 +1197,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 
 	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
+	if (is_iabt)
+		vcpu->arch.exit_reason = ARM_EXIT_IABT_LOW;
+	else if (kvm_vcpu_trap_is_dabt(vcpu))
+		vcpu->arch.exit_reason = ARM_EXIT_DABT_LOW;
 
 	/* Synchronous External Abort? */
 	if (kvm_vcpu_abt_issea(vcpu)) {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1d46e185f31e..0915dfa589c7 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2158,6 +2158,7 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
 
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP14_LS;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -2325,21 +2326,25 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
 
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP15_64;
 	return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs));
 }
 
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP15_32;
 	return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
 }
 
 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP14_64;
 	return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs));
 }
 
 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.exit_reason = ARM_EXIT_CP14_32;
 	return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs));
 }
 
@@ -2397,6 +2402,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 	int ret;
 
 	trace_kvm_handle_sys_reg(esr);
+	vcpu->arch.exit_reason = ARM_EXIT_SYS64;
 
 	params = esr_sys64_to_params(esr);
 	params.regval = vcpu_get_reg(vcpu, Rt);
-- 
2.33.0.464.g1972c5931b-goog

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v1 2/3] KVM: arm64: Add counter stats for arch specific exit reasons
  2021-09-22  1:08 ` Jing Zhang
@ 2021-09-22  1:08   ` Jing Zhang
  -1 siblings, 0 replies; 28+ messages in thread
From: Jing Zhang @ 2021-09-22  1:08 UTC (permalink / raw)
  To: KVM, KVMARM, Paolo Bonzini, Marc Zyngier, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Sean Christopherson
  Cc: Jing Zhang

The exit reason stats can be used for monitoring the VCPU status.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
 arch/arm64/include/asm/kvm_host.h | 33 ++++++++++++++++++++++++++++---
 arch/arm64/kvm/guest.c            | 33 +++++++++++++++++++++++++++----
 arch/arm64/kvm/handle_exit.c      | 22 ++++++++++++++++++---
 arch/arm64/kvm/mmu.c              |  7 +++++--
 arch/arm64/kvm/sys_regs.c         |  6 ++++++
 5 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0f0cea26ce32..4d65de22add3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -607,13 +607,40 @@ struct kvm_vm_stat {
 
 struct kvm_vcpu_stat {
 	struct kvm_vcpu_stat_generic generic;
-	u64 hvc_exit_stat;
-	u64 wfe_exit_stat;
-	u64 wfi_exit_stat;
 	u64 mmio_exit_user;
 	u64 mmio_exit_kernel;
 	u64 signal_exits;
 	u64 exits;
+	/* Stats for arch specific exit reasons */
+	struct {
+		u64 exit_unknown;
+		u64 exit_irq;
+		u64 exit_el1_serror;
+		u64 exit_hyp_gone;
+		u64 exit_il;
+		u64 exit_wfi;
+		u64 exit_wfe;
+		u64 exit_cp15_32;
+		u64 exit_cp15_64;
+		u64 exit_cp14_32;
+		u64 exit_cp14_ls;
+		u64 exit_cp14_64;
+		u64 exit_hvc32;
+		u64 exit_smc32;
+		u64 exit_hvc64;
+		u64 exit_smc64;
+		u64 exit_sys64;
+		u64 exit_sve;
+		u64 exit_iabt_low;
+		u64 exit_dabt_low;
+		u64 exit_softstp_low;
+		u64 exit_watchpt_low;
+		u64 exit_breakpt_low;
+		u64 exit_bkpt32;
+		u64 exit_brk64;
+		u64 exit_fp_asimd;
+		u64 exit_pac;
+	};
 };
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5ce26bedf23c..abd9327d7110 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -43,13 +43,38 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 
 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	KVM_GENERIC_VCPU_STATS(),
-	STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
-	STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
-	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
 	STATS_DESC_COUNTER(VCPU, signal_exits),
-	STATS_DESC_COUNTER(VCPU, exits)
+	STATS_DESC_COUNTER(VCPU, exits),
+	/* Stats for arch specific exit reasons */
+	STATS_DESC_COUNTER(VCPU, exit_unknown),
+	STATS_DESC_COUNTER(VCPU, exit_irq),
+	STATS_DESC_COUNTER(VCPU, exit_el1_serror),
+	STATS_DESC_COUNTER(VCPU, exit_hyp_gone),
+	STATS_DESC_COUNTER(VCPU, exit_il),
+	STATS_DESC_COUNTER(VCPU, exit_wfi),
+	STATS_DESC_COUNTER(VCPU, exit_wfe),
+	STATS_DESC_COUNTER(VCPU, exit_cp15_32),
+	STATS_DESC_COUNTER(VCPU, exit_cp15_64),
+	STATS_DESC_COUNTER(VCPU, exit_cp14_32),
+	STATS_DESC_COUNTER(VCPU, exit_cp14_ls),
+	STATS_DESC_COUNTER(VCPU, exit_cp14_64),
+	STATS_DESC_COUNTER(VCPU, exit_hvc32),
+	STATS_DESC_COUNTER(VCPU, exit_smc32),
+	STATS_DESC_COUNTER(VCPU, exit_hvc64),
+	STATS_DESC_COUNTER(VCPU, exit_smc64),
+	STATS_DESC_COUNTER(VCPU, exit_sys64),
+	STATS_DESC_COUNTER(VCPU, exit_sve),
+	STATS_DESC_COUNTER(VCPU, exit_iabt_low),
+	STATS_DESC_COUNTER(VCPU, exit_dabt_low),
+	STATS_DESC_COUNTER(VCPU, exit_softstp_low),
+	STATS_DESC_COUNTER(VCPU, exit_watchpt_low),
+	STATS_DESC_COUNTER(VCPU, exit_breakpt_low),
+	STATS_DESC_COUNTER(VCPU, exit_bkpt32),
+	STATS_DESC_COUNTER(VCPU, exit_brk64),
+	STATS_DESC_COUNTER(VCPU, exit_fp_asimd),
+	STATS_DESC_COUNTER(VCPU, exit_pac),
 };
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 90a47758b23d..e83cd52078b2 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -38,7 +38,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
 
 	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
 			    kvm_vcpu_hvc_get_imm(vcpu));
-	vcpu->stat.hvc_exit_stat++;
 
 	ret = kvm_hvc_call_handler(vcpu);
 	if (ret < 0) {
@@ -52,12 +51,14 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
 static int handle_hvc32(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_HVC32;
+	++vcpu->stat.exit_hvc32;
 	return handle_hvc(vcpu);
 }
 
 static int handle_hvc64(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_HVC64;
+	++vcpu->stat.exit_hvc64;
 	return handle_hvc(vcpu);
 }
 
@@ -79,12 +80,14 @@ static int handle_smc(struct kvm_vcpu *vcpu)
 static int handle_smc32(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_SMC32;
+	++vcpu->stat.exit_smc32;
 	return handle_smc(vcpu);
 }
 
 static int handle_smc64(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_SMC64;
+	++vcpu->stat.exit_smc64;
 	return handle_smc(vcpu);
 }
 
@@ -95,6 +98,7 @@ static int handle_smc64(struct kvm_vcpu *vcpu)
 static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_FP_ASIMD;
+	++vcpu->stat.exit_fp_asimd;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -115,13 +119,13 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
 {
 	if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
-		vcpu->stat.wfe_exit_stat++;
 		vcpu->arch.exit_reason = ARM_EXIT_WFE;
+		++vcpu->stat.exit_wfe;
 		kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
 	} else {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
-		vcpu->stat.wfi_exit_stat++;
 		vcpu->arch.exit_reason = ARM_EXIT_WFI;
+		++vcpu->stat.exit_wfi;
 		kvm_vcpu_block(vcpu);
 		kvm_clear_request(KVM_REQ_UNHALT, vcpu);
 	}
@@ -154,19 +158,24 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
 	switch (esr_ec) {
 	case ESR_ELx_EC_SOFTSTP_LOW:
 		vcpu->arch.exit_reason = ARM_EXIT_SOFTSTP_LOW;
+		++vcpu->stat.exit_softstp_low;
 		break;
 	case ESR_ELx_EC_WATCHPT_LOW:
 		run->debug.arch.far = vcpu->arch.fault.far_el2;
 		vcpu->arch.exit_reason = ARM_EXIT_WATCHPT_LOW;
+		++vcpu->stat.exit_watchpt_low;
 		break;
 	case ESR_ELx_EC_BREAKPT_LOW:
 		vcpu->arch.exit_reason = ARM_EXIT_BREAKPT_LOW;
+		++vcpu->stat.exit_breakpt_low;
 		break;
 	case ESR_ELx_EC_BKPT32:
 		vcpu->arch.exit_reason = ARM_EXIT_BKPT32;
+		++vcpu->stat.exit_bkpt32;
 		break;
 	case ESR_ELx_EC_BRK64:
 		vcpu->arch.exit_reason = ARM_EXIT_BRK64;
+		++vcpu->stat.exit_brk64;
 		break;
 	}
 
@@ -181,6 +190,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
 		      esr, esr_get_class_string(esr));
 
 	vcpu->arch.exit_reason = ARM_EXIT_UNKNOWN;
+	++vcpu->stat.exit_unknown;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -188,6 +198,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
 static int handle_sve(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_SVE;
+	++vcpu->stat.exit_sve;
 	/* Until SVE is supported for guests: */
 	kvm_inject_undefined(vcpu);
 	return 1;
@@ -201,6 +212,7 @@ static int handle_sve(struct kvm_vcpu *vcpu)
 static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_PAC;
+	++vcpu->stat.exit_pac;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -278,9 +290,11 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 	switch (exception_index) {
 	case ARM_EXCEPTION_IRQ:
 		vcpu->arch.exit_reason = ARM_EXIT_IRQ;
+		++vcpu->stat.exit_irq;
 		return 1;
 	case ARM_EXCEPTION_EL1_SERROR:
 		vcpu->arch.exit_reason = ARM_EXIT_EL1_SERROR;
+		++vcpu->stat.exit_el1_serror;
 		return 1;
 	case ARM_EXCEPTION_TRAP:
 		return handle_trap_exceptions(vcpu);
@@ -291,6 +305,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 		 */
 		vcpu->arch.exit_reason = ARM_EXIT_HYP_GONE;
 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		++vcpu->stat.exit_hyp_gone;
 		return 0;
 	case ARM_EXCEPTION_IL:
 		/*
@@ -299,6 +314,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 		 */
 		vcpu->arch.exit_reason = ARM_EXIT_IL;
 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		++vcpu->stat.exit_il;
 		return -EINVAL;
 	default:
 		kvm_pr_unimpl("Unsupported exception type: %d",
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index a6a18d113c98..799c756dd9f5 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1197,10 +1197,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 
 	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
-	if (is_iabt)
+	if (is_iabt) {
 		vcpu->arch.exit_reason = ARM_EXIT_IABT_LOW;
-	else if (kvm_vcpu_trap_is_dabt(vcpu))
+		++vcpu->stat.exit_iabt_low;
+	} else if (kvm_vcpu_trap_is_dabt(vcpu)) {
 		vcpu->arch.exit_reason = ARM_EXIT_DABT_LOW;
+		++vcpu->stat.exit_dabt_low;
+	}
 
 	/* Synchronous External Abort? */
 	if (kvm_vcpu_abt_issea(vcpu)) {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 0915dfa589c7..344a6ff26bf6 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2159,6 +2159,7 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP14_LS;
+	++vcpu->stat.exit_cp14_ls;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -2327,24 +2328,28 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP15_64;
+	++vcpu->stat.exit_cp15_64;
 	return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs));
 }
 
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP15_32;
+	++vcpu->stat.exit_cp15_32;
 	return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
 }
 
 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP14_64;
+	++vcpu->stat.exit_cp14_64;
 	return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs));
 }
 
 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP14_32;
+	++vcpu->stat.exit_cp14_32;
 	return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs));
 }
 
@@ -2403,6 +2408,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 
 	trace_kvm_handle_sys_reg(esr);
 	vcpu->arch.exit_reason = ARM_EXIT_SYS64;
+	++vcpu->stat.exit_sys64;
 
 	params = esr_sys64_to_params(esr);
 	params.regval = vcpu_get_reg(vcpu, Rt);
-- 
2.33.0.464.g1972c5931b-goog


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v1 2/3] KVM: arm64: Add counter stats for arch specific exit reasons
@ 2021-09-22  1:08   ` Jing Zhang
  0 siblings, 0 replies; 28+ messages in thread
From: Jing Zhang @ 2021-09-22  1:08 UTC (permalink / raw)
  To: KVM, KVMARM, Paolo Bonzini, Marc Zyngier, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Sean Christopherson

The exit reason stats can be used for monitoring the VCPU status.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
 arch/arm64/include/asm/kvm_host.h | 33 ++++++++++++++++++++++++++++---
 arch/arm64/kvm/guest.c            | 33 +++++++++++++++++++++++++++----
 arch/arm64/kvm/handle_exit.c      | 22 ++++++++++++++++++---
 arch/arm64/kvm/mmu.c              |  7 +++++--
 arch/arm64/kvm/sys_regs.c         |  6 ++++++
 5 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0f0cea26ce32..4d65de22add3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -607,13 +607,40 @@ struct kvm_vm_stat {
 
 struct kvm_vcpu_stat {
 	struct kvm_vcpu_stat_generic generic;
-	u64 hvc_exit_stat;
-	u64 wfe_exit_stat;
-	u64 wfi_exit_stat;
 	u64 mmio_exit_user;
 	u64 mmio_exit_kernel;
 	u64 signal_exits;
 	u64 exits;
+	/* Stats for arch specific exit reasons */
+	struct {
+		u64 exit_unknown;
+		u64 exit_irq;
+		u64 exit_el1_serror;
+		u64 exit_hyp_gone;
+		u64 exit_il;
+		u64 exit_wfi;
+		u64 exit_wfe;
+		u64 exit_cp15_32;
+		u64 exit_cp15_64;
+		u64 exit_cp14_32;
+		u64 exit_cp14_ls;
+		u64 exit_cp14_64;
+		u64 exit_hvc32;
+		u64 exit_smc32;
+		u64 exit_hvc64;
+		u64 exit_smc64;
+		u64 exit_sys64;
+		u64 exit_sve;
+		u64 exit_iabt_low;
+		u64 exit_dabt_low;
+		u64 exit_softstp_low;
+		u64 exit_watchpt_low;
+		u64 exit_breakpt_low;
+		u64 exit_bkpt32;
+		u64 exit_brk64;
+		u64 exit_fp_asimd;
+		u64 exit_pac;
+	};
 };
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5ce26bedf23c..abd9327d7110 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -43,13 +43,38 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 
 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	KVM_GENERIC_VCPU_STATS(),
-	STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
-	STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
-	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
 	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
 	STATS_DESC_COUNTER(VCPU, signal_exits),
-	STATS_DESC_COUNTER(VCPU, exits)
+	STATS_DESC_COUNTER(VCPU, exits),
+	/* Stats for arch specific exit reasons */
+	STATS_DESC_COUNTER(VCPU, exit_unknown),
+	STATS_DESC_COUNTER(VCPU, exit_irq),
+	STATS_DESC_COUNTER(VCPU, exit_el1_serror),
+	STATS_DESC_COUNTER(VCPU, exit_hyp_gone),
+	STATS_DESC_COUNTER(VCPU, exit_il),
+	STATS_DESC_COUNTER(VCPU, exit_wfi),
+	STATS_DESC_COUNTER(VCPU, exit_wfe),
+	STATS_DESC_COUNTER(VCPU, exit_cp15_32),
+	STATS_DESC_COUNTER(VCPU, exit_cp15_64),
+	STATS_DESC_COUNTER(VCPU, exit_cp14_32),
+	STATS_DESC_COUNTER(VCPU, exit_cp14_ls),
+	STATS_DESC_COUNTER(VCPU, exit_cp14_64),
+	STATS_DESC_COUNTER(VCPU, exit_hvc32),
+	STATS_DESC_COUNTER(VCPU, exit_smc32),
+	STATS_DESC_COUNTER(VCPU, exit_hvc64),
+	STATS_DESC_COUNTER(VCPU, exit_smc64),
+	STATS_DESC_COUNTER(VCPU, exit_sys64),
+	STATS_DESC_COUNTER(VCPU, exit_sve),
+	STATS_DESC_COUNTER(VCPU, exit_iabt_low),
+	STATS_DESC_COUNTER(VCPU, exit_dabt_low),
+	STATS_DESC_COUNTER(VCPU, exit_softstp_low),
+	STATS_DESC_COUNTER(VCPU, exit_watchpt_low),
+	STATS_DESC_COUNTER(VCPU, exit_breakpt_low),
+	STATS_DESC_COUNTER(VCPU, exit_bkpt32),
+	STATS_DESC_COUNTER(VCPU, exit_brk64),
+	STATS_DESC_COUNTER(VCPU, exit_fp_asimd),
+	STATS_DESC_COUNTER(VCPU, exit_pac),
 };
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 90a47758b23d..e83cd52078b2 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -38,7 +38,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
 
 	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
 			    kvm_vcpu_hvc_get_imm(vcpu));
-	vcpu->stat.hvc_exit_stat++;
 
 	ret = kvm_hvc_call_handler(vcpu);
 	if (ret < 0) {
@@ -52,12 +51,14 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
 static int handle_hvc32(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_HVC32;
+	++vcpu->stat.exit_hvc32;
 	return handle_hvc(vcpu);
 }
 
 static int handle_hvc64(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_HVC64;
+	++vcpu->stat.exit_hvc64;
 	return handle_hvc(vcpu);
 }
 
@@ -79,12 +80,14 @@ static int handle_smc(struct kvm_vcpu *vcpu)
 static int handle_smc32(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_SMC32;
+	++vcpu->stat.exit_smc32;
 	return handle_smc(vcpu);
 }
 
 static int handle_smc64(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_SMC64;
+	++vcpu->stat.exit_smc64;
 	return handle_smc(vcpu);
 }
 
@@ -95,6 +98,7 @@ static int handle_smc64(struct kvm_vcpu *vcpu)
 static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_FP_ASIMD;
+	++vcpu->stat.exit_fp_asimd;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -115,13 +119,13 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
 {
 	if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
-		vcpu->stat.wfe_exit_stat++;
 		vcpu->arch.exit_reason = ARM_EXIT_WFE;
+		++vcpu->stat.exit_wfe;
 		kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
 	} else {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
-		vcpu->stat.wfi_exit_stat++;
 		vcpu->arch.exit_reason = ARM_EXIT_WFI;
+		++vcpu->stat.exit_wfi;
 		kvm_vcpu_block(vcpu);
 		kvm_clear_request(KVM_REQ_UNHALT, vcpu);
 	}
@@ -154,19 +158,24 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
 	switch (esr_ec) {
 	case ESR_ELx_EC_SOFTSTP_LOW:
 		vcpu->arch.exit_reason = ARM_EXIT_SOFTSTP_LOW;
+		++vcpu->stat.exit_softstp_low;
 		break;
 	case ESR_ELx_EC_WATCHPT_LOW:
 		run->debug.arch.far = vcpu->arch.fault.far_el2;
 		vcpu->arch.exit_reason = ARM_EXIT_WATCHPT_LOW;
+		++vcpu->stat.exit_watchpt_low;
 		break;
 	case ESR_ELx_EC_BREAKPT_LOW:
 		vcpu->arch.exit_reason = ARM_EXIT_BREAKPT_LOW;
+		++vcpu->stat.exit_breakpt_low;
 		break;
 	case ESR_ELx_EC_BKPT32:
 		vcpu->arch.exit_reason = ARM_EXIT_BKPT32;
+		++vcpu->stat.exit_bkpt32;
 		break;
 	case ESR_ELx_EC_BRK64:
 		vcpu->arch.exit_reason = ARM_EXIT_BRK64;
+		++vcpu->stat.exit_brk64;
 		break;
 	}
 
@@ -181,6 +190,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
 		      esr, esr_get_class_string(esr));
 
 	vcpu->arch.exit_reason = ARM_EXIT_UNKNOWN;
+	++vcpu->stat.exit_unknown;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -188,6 +198,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu)
 static int handle_sve(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_SVE;
+	++vcpu->stat.exit_sve;
 	/* Until SVE is supported for guests: */
 	kvm_inject_undefined(vcpu);
 	return 1;
@@ -201,6 +212,7 @@ static int handle_sve(struct kvm_vcpu *vcpu)
 static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_PAC;
+	++vcpu->stat.exit_pac;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -278,9 +290,11 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 	switch (exception_index) {
 	case ARM_EXCEPTION_IRQ:
 		vcpu->arch.exit_reason = ARM_EXIT_IRQ;
+		++vcpu->stat.exit_irq;
 		return 1;
 	case ARM_EXCEPTION_EL1_SERROR:
 		vcpu->arch.exit_reason = ARM_EXIT_EL1_SERROR;
+		++vcpu->stat.exit_el1_serror;
 		return 1;
 	case ARM_EXCEPTION_TRAP:
 		return handle_trap_exceptions(vcpu);
@@ -291,6 +305,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 		 */
 		vcpu->arch.exit_reason = ARM_EXIT_HYP_GONE;
 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		++vcpu->stat.exit_hyp_gone;
 		return 0;
 	case ARM_EXCEPTION_IL:
 		/*
@@ -299,6 +314,7 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 		 */
 		vcpu->arch.exit_reason = ARM_EXIT_IL;
 		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		++vcpu->stat.exit_il;
 		return -EINVAL;
 	default:
 		kvm_pr_unimpl("Unsupported exception type: %d",
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index a6a18d113c98..799c756dd9f5 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1197,10 +1197,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 
 	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
-	if (is_iabt)
+	if (is_iabt) {
 		vcpu->arch.exit_reason = ARM_EXIT_IABT_LOW;
-	else if (kvm_vcpu_trap_is_dabt(vcpu))
+		++vcpu->stat.exit_iabt_low;
+	} else if (kvm_vcpu_trap_is_dabt(vcpu)) {
 		vcpu->arch.exit_reason = ARM_EXIT_DABT_LOW;
+		++vcpu->stat.exit_dabt_low;
+	}
 
 	/* Synchronous External Abort? */
 	if (kvm_vcpu_abt_issea(vcpu)) {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 0915dfa589c7..344a6ff26bf6 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2159,6 +2159,7 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP14_LS;
+	++vcpu->stat.exit_cp14_ls;
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -2327,24 +2328,28 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP15_64;
+	++vcpu->stat.exit_cp15_64;
 	return kvm_handle_cp_64(vcpu, cp15_64_regs, ARRAY_SIZE(cp15_64_regs));
 }
 
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP15_32;
+	++vcpu->stat.exit_cp15_32;
 	return kvm_handle_cp_32(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
 }
 
 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP14_64;
+	++vcpu->stat.exit_cp14_64;
 	return kvm_handle_cp_64(vcpu, cp14_64_regs, ARRAY_SIZE(cp14_64_regs));
 }
 
 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.exit_reason = ARM_EXIT_CP14_32;
+	++vcpu->stat.exit_cp14_32;
 	return kvm_handle_cp_32(vcpu, cp14_regs, ARRAY_SIZE(cp14_regs));
 }
 
@@ -2403,6 +2408,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 
 	trace_kvm_handle_sys_reg(esr);
 	vcpu->arch.exit_reason = ARM_EXIT_SYS64;
+	++vcpu->stat.exit_sys64;
 
 	params = esr_sys64_to_params(esr);
 	params.regval = vcpu_get_reg(vcpu, Rt);
-- 
2.33.0.464.g1972c5931b-goog

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-22  1:08 ` Jing Zhang
@ 2021-09-22  1:08   ` Jing Zhang
  -1 siblings, 0 replies; 28+ messages in thread
From: Jing Zhang @ 2021-09-22  1:08 UTC (permalink / raw)
  To: KVM, KVMARM, Paolo Bonzini, Marc Zyngier, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Sean Christopherson
  Cc: Jing Zhang

These logarithmic histogram stats are useful for monitoring performance
of handling for different kinds of VCPU exit reasons.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
 arch/arm64/include/asm/kvm_host.h | 36 ++++++++++++
 arch/arm64/kvm/arm.c              |  4 ++
 arch/arm64/kvm/guest.c            | 43 ++++++++++++++
 arch/arm64/kvm/handle_exit.c      | 95 +++++++++++++++++++++++++++++++
 4 files changed, 178 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4d65de22add3..f1a29ca3d4f3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -417,6 +417,9 @@ struct kvm_vcpu_arch {
 
 	/* Arch specific exit reason */
 	enum arm_exit_reason exit_reason;
+
+	/* The timestamp for the last VCPU exit */
+	u64 last_exit_time;
 };
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -605,6 +608,8 @@ struct kvm_vm_stat {
 	struct kvm_vm_stat_generic generic;
 };
 
+#define ARM_EXIT_HIST_CNT	64
+
 struct kvm_vcpu_stat {
 	struct kvm_vcpu_stat_generic generic;
 	u64 mmio_exit_user;
@@ -641,6 +646,36 @@ struct kvm_vcpu_stat {
 		u64 exit_fp_asimd;
 		u64 exit_pac;
 	};
+	/* Histogram stats for handling time of arch specific exit reasons */
+	struct {
+		u64 exit_unknown_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_irq_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_el1_serror_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_hyp_gone_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_il_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_wfi_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_wfe_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp15_32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp15_64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp14_32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp14_ls_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp14_64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_hvc32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_smc32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_hvc64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_smc64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_sys64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_sve_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_iabt_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_dabt_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_softstp_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_watchpt_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_breakpt_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_bkpt32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_brk64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_fp_asimd_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_pac_hist[ARM_EXIT_HIST_CNT];
+	};
 };
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
@@ -715,6 +750,7 @@ void force_vm_exit(const cpumask_t *mask);
 
 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
+void update_hist_exit_stats(struct kvm_vcpu *vcpu);
 
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index fe102cd2e518..156f80b699d3 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -795,6 +795,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 	ret = 1;
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	while (ret > 0) {
+		/* Update histogram stats for exit reasons */
+		update_hist_exit_stats(vcpu);
+
 		/*
 		 * Check conditions before entering the guest
 		 */
@@ -903,6 +906,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 */
 		guest_exit();
 		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+		vcpu->arch.last_exit_time = ktime_to_ns(ktime_get());
 
 		/* Exit types that need handling before we can be preempted */
 		handle_exit_early(vcpu, ret);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index abd9327d7110..bbf51578fdec 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -75,6 +75,49 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, exit_brk64),
 	STATS_DESC_COUNTER(VCPU, exit_fp_asimd),
 	STATS_DESC_COUNTER(VCPU, exit_pac),
+	/* Histogram stats for handling time of arch specific exit reasons */
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_unknown_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_irq_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_el1_serror_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_hyp_gone_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_il_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_wfi_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_wfe_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp15_32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp15_64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp14_32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp14_ls_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp14_64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_hvc32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_smc32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_hvc64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_smc64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_sys64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_sve_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_iabt_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_dabt_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_softstp_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_watchpt_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_breakpt_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_bkpt32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_brk64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_fp_asimd_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_pac_hist, ARM_EXIT_HIST_CNT),
 };
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index e83cd52078b2..5e642a6275c1 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -395,3 +395,98 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
 	panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
 	      spsr, elr_virt, esr, far, hpfar, par, vcpu);
 }
+
+void update_hist_exit_stats(struct kvm_vcpu *vcpu)
+{
+	u64 val = ktime_to_ns(ktime_get()) - vcpu->arch.last_exit_time;
+
+	if (unlikely(!vcpu->arch.last_exit_time))
+		return;
+
+	switch (vcpu->arch.exit_reason) {
+	case ARM_EXIT_UNKNOWN:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_unknown_hist, val);
+		break;
+	case ARM_EXIT_IRQ:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_irq_hist, val);
+		break;
+	case ARM_EXIT_EL1_SERROR:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_el1_serror_hist, val);
+		break;
+	case ARM_EXIT_HYP_GONE:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_hyp_gone_hist, val);
+		break;
+	case ARM_EXIT_IL:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_il_hist, val);
+		break;
+	case ARM_EXIT_WFI:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_wfi_hist, val);
+		break;
+	case ARM_EXIT_WFE:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_wfe_hist, val);
+		break;
+	case ARM_EXIT_CP15_32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp15_32_hist, val);
+		break;
+	case ARM_EXIT_CP15_64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp15_64_hist, val);
+		break;
+	case ARM_EXIT_CP14_32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp14_32_hist, val);
+		break;
+	case ARM_EXIT_CP14_LS:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp14_ls_hist, val);
+		break;
+	case ARM_EXIT_CP14_64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp14_64_hist, val);
+		break;
+	case ARM_EXIT_HVC32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_hvc32_hist, val);
+		break;
+	case ARM_EXIT_SMC32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_smc32_hist, val);
+		break;
+	case ARM_EXIT_HVC64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_hvc64_hist, val);
+		break;
+	case ARM_EXIT_SMC64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_smc64_hist, val);
+		break;
+	case ARM_EXIT_SYS64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_sys64_hist, val);
+		break;
+	case ARM_EXIT_SVE:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_sve_hist, val);
+		break;
+	case ARM_EXIT_IABT_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_iabt_low_hist, val);
+		break;
+	case ARM_EXIT_DABT_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_dabt_low_hist, val);
+		break;
+	case ARM_EXIT_SOFTSTP_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(
+				vcpu->stat.exit_softstp_low_hist, val);
+		break;
+	case ARM_EXIT_WATCHPT_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(
+				vcpu->stat.exit_watchpt_low_hist, val);
+		break;
+	case ARM_EXIT_BREAKPT_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(
+				vcpu->stat.exit_breakpt_low_hist, val);
+		break;
+	case ARM_EXIT_BKPT32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_bkpt32_hist, val);
+		break;
+	case ARM_EXIT_BRK64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_brk64_hist, val);
+		break;
+	case ARM_EXIT_FP_ASIMD:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_fp_asimd_hist, val);
+		break;
+	case ARM_EXIT_PAC:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_pac_hist, val);
+		break;
+	}
+}
-- 
2.33.0.464.g1972c5931b-goog


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-22  1:08   ` Jing Zhang
  0 siblings, 0 replies; 28+ messages in thread
From: Jing Zhang @ 2021-09-22  1:08 UTC (permalink / raw)
  To: KVM, KVMARM, Paolo Bonzini, Marc Zyngier, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Sean Christopherson

These logarithmic histogram stats are useful for monitoring performance
of handling for different kinds of VCPU exit reasons.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
 arch/arm64/include/asm/kvm_host.h | 36 ++++++++++++
 arch/arm64/kvm/arm.c              |  4 ++
 arch/arm64/kvm/guest.c            | 43 ++++++++++++++
 arch/arm64/kvm/handle_exit.c      | 95 +++++++++++++++++++++++++++++++
 4 files changed, 178 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4d65de22add3..f1a29ca3d4f3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -417,6 +417,9 @@ struct kvm_vcpu_arch {
 
 	/* Arch specific exit reason */
 	enum arm_exit_reason exit_reason;
+
+	/* The timestamp for the last VCPU exit */
+	u64 last_exit_time;
 };
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -605,6 +608,8 @@ struct kvm_vm_stat {
 	struct kvm_vm_stat_generic generic;
 };
 
+#define ARM_EXIT_HIST_CNT	64
+
 struct kvm_vcpu_stat {
 	struct kvm_vcpu_stat_generic generic;
 	u64 mmio_exit_user;
@@ -641,6 +646,36 @@ struct kvm_vcpu_stat {
 		u64 exit_fp_asimd;
 		u64 exit_pac;
 	};
+	/* Histogram stats for handling time of arch specific exit reasons */
+	struct {
+		u64 exit_unknown_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_irq_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_el1_serror_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_hyp_gone_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_il_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_wfi_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_wfe_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp15_32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp15_64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp14_32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp14_ls_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_cp14_64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_hvc32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_smc32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_hvc64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_smc64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_sys64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_sve_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_iabt_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_dabt_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_softstp_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_watchpt_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_breakpt_low_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_bkpt32_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_brk64_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_fp_asimd_hist[ARM_EXIT_HIST_CNT];
+		u64 exit_pac_hist[ARM_EXIT_HIST_CNT];
+	};
 };
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
@@ -715,6 +750,7 @@ void force_vm_exit(const cpumask_t *mask);
 
 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
+void update_hist_exit_stats(struct kvm_vcpu *vcpu);
 
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
 int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index fe102cd2e518..156f80b699d3 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -795,6 +795,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 	ret = 1;
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	while (ret > 0) {
+		/* Update histogram stats for exit reasons */
+		update_hist_exit_stats(vcpu);
+
 		/*
 		 * Check conditions before entering the guest
 		 */
@@ -903,6 +906,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 */
 		guest_exit();
 		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+		vcpu->arch.last_exit_time = ktime_to_ns(ktime_get());
 
 		/* Exit types that need handling before we can be preempted */
 		handle_exit_early(vcpu, ret);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index abd9327d7110..bbf51578fdec 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -75,6 +75,49 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, exit_brk64),
 	STATS_DESC_COUNTER(VCPU, exit_fp_asimd),
 	STATS_DESC_COUNTER(VCPU, exit_pac),
+	/* Histogram stats for handling time of arch specific exit reasons */
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_unknown_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_irq_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_el1_serror_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_hyp_gone_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_il_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_wfi_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_wfe_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp15_32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp15_64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp14_32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp14_ls_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_cp14_64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_hvc32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_smc32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_hvc64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_smc64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_sys64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_sve_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_iabt_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_dabt_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_softstp_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_watchpt_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_breakpt_low_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_bkpt32_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_brk64_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(
+			VCPU, exit_fp_asimd_hist, ARM_EXIT_HIST_CNT),
+	STATS_DESC_LOGHIST_TIME_NSEC(VCPU, exit_pac_hist, ARM_EXIT_HIST_CNT),
 };
 
 const struct kvm_stats_header kvm_vcpu_stats_header = {
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index e83cd52078b2..5e642a6275c1 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -395,3 +395,98 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
 	panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
 	      spsr, elr_virt, esr, far, hpfar, par, vcpu);
 }
+
+void update_hist_exit_stats(struct kvm_vcpu *vcpu)
+{
+	u64 val = ktime_to_ns(ktime_get()) - vcpu->arch.last_exit_time;
+
+	if (unlikely(!vcpu->arch.last_exit_time))
+		return;
+
+	switch (vcpu->arch.exit_reason) {
+	case ARM_EXIT_UNKNOWN:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_unknown_hist, val);
+		break;
+	case ARM_EXIT_IRQ:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_irq_hist, val);
+		break;
+	case ARM_EXIT_EL1_SERROR:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_el1_serror_hist, val);
+		break;
+	case ARM_EXIT_HYP_GONE:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_hyp_gone_hist, val);
+		break;
+	case ARM_EXIT_IL:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_il_hist, val);
+		break;
+	case ARM_EXIT_WFI:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_wfi_hist, val);
+		break;
+	case ARM_EXIT_WFE:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_wfe_hist, val);
+		break;
+	case ARM_EXIT_CP15_32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp15_32_hist, val);
+		break;
+	case ARM_EXIT_CP15_64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp15_64_hist, val);
+		break;
+	case ARM_EXIT_CP14_32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp14_32_hist, val);
+		break;
+	case ARM_EXIT_CP14_LS:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp14_ls_hist, val);
+		break;
+	case ARM_EXIT_CP14_64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_cp14_64_hist, val);
+		break;
+	case ARM_EXIT_HVC32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_hvc32_hist, val);
+		break;
+	case ARM_EXIT_SMC32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_smc32_hist, val);
+		break;
+	case ARM_EXIT_HVC64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_hvc64_hist, val);
+		break;
+	case ARM_EXIT_SMC64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_smc64_hist, val);
+		break;
+	case ARM_EXIT_SYS64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_sys64_hist, val);
+		break;
+	case ARM_EXIT_SVE:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_sve_hist, val);
+		break;
+	case ARM_EXIT_IABT_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_iabt_low_hist, val);
+		break;
+	case ARM_EXIT_DABT_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_dabt_low_hist, val);
+		break;
+	case ARM_EXIT_SOFTSTP_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(
+				vcpu->stat.exit_softstp_low_hist, val);
+		break;
+	case ARM_EXIT_WATCHPT_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(
+				vcpu->stat.exit_watchpt_low_hist, val);
+		break;
+	case ARM_EXIT_BREAKPT_LOW:
+		KVM_STATS_LOG_HIST_UPDATE(
+				vcpu->stat.exit_breakpt_low_hist, val);
+		break;
+	case ARM_EXIT_BKPT32:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_bkpt32_hist, val);
+		break;
+	case ARM_EXIT_BRK64:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_brk64_hist, val);
+		break;
+	case ARM_EXIT_FP_ASIMD:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_fp_asimd_hist, val);
+		break;
+	case ARM_EXIT_PAC:
+		KVM_STATS_LOG_HIST_UPDATE(vcpu->stat.exit_pac_hist, val);
+		break;
+	}
+}
-- 
2.33.0.464.g1972c5931b-goog

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-22  1:08   ` Jing Zhang
@ 2021-09-22 11:22     ` Marc Zyngier
  -1 siblings, 0 replies; 28+ messages in thread
From: Marc Zyngier @ 2021-09-22 11:22 UTC (permalink / raw)
  To: Jing Zhang
  Cc: KVM, KVMARM, Paolo Bonzini, Will Deacon, David Matlack,
	Peter Shier, Oliver Upton, Sean Christopherson

Hi Jing,

On Wed, 22 Sep 2021 02:08:51 +0100,
Jing Zhang <jingzhangos@google.com> wrote:
> 
> These logarithmic histogram stats are useful for monitoring performance
> of handling for different kinds of VCPU exit reasons.
> 
> Signed-off-by: Jing Zhang <jingzhangos@google.com>
> ---
>  arch/arm64/include/asm/kvm_host.h | 36 ++++++++++++
>  arch/arm64/kvm/arm.c              |  4 ++
>  arch/arm64/kvm/guest.c            | 43 ++++++++++++++
>  arch/arm64/kvm/handle_exit.c      | 95 +++++++++++++++++++++++++++++++
>  4 files changed, 178 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 4d65de22add3..f1a29ca3d4f3 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -417,6 +417,9 @@ struct kvm_vcpu_arch {
>  
>  	/* Arch specific exit reason */
>  	enum arm_exit_reason exit_reason;
> +
> +	/* The timestamp for the last VCPU exit */
> +	u64 last_exit_time;
>  };
>  
>  /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> @@ -605,6 +608,8 @@ struct kvm_vm_stat {
>  	struct kvm_vm_stat_generic generic;
>  };
>  
> +#define ARM_EXIT_HIST_CNT	64
> +
>  struct kvm_vcpu_stat {
>  	struct kvm_vcpu_stat_generic generic;
>  	u64 mmio_exit_user;
> @@ -641,6 +646,36 @@ struct kvm_vcpu_stat {
>  		u64 exit_fp_asimd;
>  		u64 exit_pac;
>  	};
> +	/* Histogram stats for handling time of arch specific exit reasons */
> +	struct {
> +		u64 exit_unknown_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_irq_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_el1_serror_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_hyp_gone_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_il_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_wfi_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_wfe_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp15_32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp15_64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp14_32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp14_ls_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp14_64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_hvc32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_smc32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_hvc64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_smc64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_sys64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_sve_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_iabt_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_dabt_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_softstp_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_watchpt_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_breakpt_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_bkpt32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_brk64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_fp_asimd_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_pac_hist[ARM_EXIT_HIST_CNT];
> +	};

I'm sorry, but I don't think this is right, and I really wish you had
reached out before putting any effort in this.

This appears to be as ~13kB per vcpu worth of data that means
*nothing*. Counting exception syndrome occurrences doesn't say
anything about how important the associated exception is. A few
examples:

- exit_hyp_gone_hist: KVM is gone. We've removed the hypervisor. What
  is the point of this? How many values do you expect?

- exit_dabt_low_hist: page faults. Crucial data, isn't it? Permission,
  Access, or Translation fault? At which level? Caused by userspace or
  kernel? Resulted in a page being allocated or not? Read from swap?
  Successful or not?

- exit_pac_hist, exit_fp_asimd_hist: actually handled early, and only
  end-up in that part of the hypervisor when there is nothing to do
  (either there is no corresponding HW or the feature is disabled).

- exit_sys64_hist: System register traps. Which sysreg? With what effect?

Blindly exposing these numbers doesn't result in anything that can be
used for any sort of useful analysis and bloats the already huge data
structures. It also affects every single user, most of which do not
care about this data. It also doesn't scale. Are you going to keep
adding these counters and histograms every time the architecture (or
KVM itself) grows some new event?

Frankly, this is a job for BPF and the tracing subsystem, not for some
hardcoded syndrome accounting. It would allow to extract meaningful
information, prevent bloat, and crucially make it optional. Even empty
trace points like the ones used in the scheduler would be infinitely
better than this (load your own module that hooks into these trace
points, expose the data you want, any way you want).

As it stands, there is no way I'd be considering merging something
that looks like this series.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-22 11:22     ` Marc Zyngier
  0 siblings, 0 replies; 28+ messages in thread
From: Marc Zyngier @ 2021-09-22 11:22 UTC (permalink / raw)
  To: Jing Zhang
  Cc: KVM, Sean Christopherson, Peter Shier, David Matlack,
	Paolo Bonzini, Will Deacon, KVMARM

Hi Jing,

On Wed, 22 Sep 2021 02:08:51 +0100,
Jing Zhang <jingzhangos@google.com> wrote:
> 
> These logarithmic histogram stats are useful for monitoring performance
> of handling for different kinds of VCPU exit reasons.
> 
> Signed-off-by: Jing Zhang <jingzhangos@google.com>
> ---
>  arch/arm64/include/asm/kvm_host.h | 36 ++++++++++++
>  arch/arm64/kvm/arm.c              |  4 ++
>  arch/arm64/kvm/guest.c            | 43 ++++++++++++++
>  arch/arm64/kvm/handle_exit.c      | 95 +++++++++++++++++++++++++++++++
>  4 files changed, 178 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 4d65de22add3..f1a29ca3d4f3 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -417,6 +417,9 @@ struct kvm_vcpu_arch {
>  
>  	/* Arch specific exit reason */
>  	enum arm_exit_reason exit_reason;
> +
> +	/* The timestamp for the last VCPU exit */
> +	u64 last_exit_time;
>  };
>  
>  /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> @@ -605,6 +608,8 @@ struct kvm_vm_stat {
>  	struct kvm_vm_stat_generic generic;
>  };
>  
> +#define ARM_EXIT_HIST_CNT	64
> +
>  struct kvm_vcpu_stat {
>  	struct kvm_vcpu_stat_generic generic;
>  	u64 mmio_exit_user;
> @@ -641,6 +646,36 @@ struct kvm_vcpu_stat {
>  		u64 exit_fp_asimd;
>  		u64 exit_pac;
>  	};
> +	/* Histogram stats for handling time of arch specific exit reasons */
> +	struct {
> +		u64 exit_unknown_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_irq_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_el1_serror_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_hyp_gone_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_il_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_wfi_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_wfe_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp15_32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp15_64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp14_32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp14_ls_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_cp14_64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_hvc32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_smc32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_hvc64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_smc64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_sys64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_sve_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_iabt_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_dabt_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_softstp_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_watchpt_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_breakpt_low_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_bkpt32_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_brk64_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_fp_asimd_hist[ARM_EXIT_HIST_CNT];
> +		u64 exit_pac_hist[ARM_EXIT_HIST_CNT];
> +	};

I'm sorry, but I don't think this is right, and I really wish you had
reached out before putting any effort in this.

This appears to be as ~13kB per vcpu worth of data that means
*nothing*. Counting exception syndrome occurrences doesn't say
anything about how important the associated exception is. A few
examples:

- exit_hyp_gone_hist: KVM is gone. We've removed the hypervisor. What
  is the point of this? How many values do you expect?

- exit_dabt_low_hist: page faults. Crucial data, isn't it? Permission,
  Access, or Translation fault? At which level? Caused by userspace or
  kernel? Resulted in a page being allocated or not? Read from swap?
  Successful or not?

- exit_pac_hist, exit_fp_asimd_hist: actually handled early, and only
  end-up in that part of the hypervisor when there is nothing to do
  (either there is no corresponding HW or the feature is disabled).

- exit_sys64_hist: System register traps. Which sysreg? With what effect?

Blindly exposing these numbers doesn't result in anything that can be
used for any sort of useful analysis and bloats the already huge data
structures. It also affects every single user, most of which do not
care about this data. It also doesn't scale. Are you going to keep
adding these counters and histograms every time the architecture (or
KVM itself) grows some new event?

Frankly, this is a job for BPF and the tracing subsystem, not for some
hardcoded syndrome accounting. It would allow to extract meaningful
information, prevent bloat, and crucially make it optional. Even empty
trace points like the ones used in the scheduler would be infinitely
better than this (load your own module that hooks into these trace
points, expose the data you want, any way you want).

As it stands, there is no way I'd be considering merging something
that looks like this series.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-22 11:22     ` Marc Zyngier
@ 2021-09-22 15:37       ` Paolo Bonzini
  -1 siblings, 0 replies; 28+ messages in thread
From: Paolo Bonzini @ 2021-09-22 15:37 UTC (permalink / raw)
  To: Marc Zyngier, Jing Zhang
  Cc: KVM, KVMARM, Will Deacon, David Matlack, Peter Shier,
	Oliver Upton, Sean Christopherson

On 22/09/21 13:22, Marc Zyngier wrote:
> Frankly, this is a job for BPF and the tracing subsystem, not for some
> hardcoded syndrome accounting. It would allow to extract meaningful
> information, prevent bloat, and crucially make it optional. Even empty
> trace points like the ones used in the scheduler would be infinitely
> better than this (load your own module that hooks into these trace
> points, expose the data you want, any way you want).

I agree.  I had left out for later the similar series you had for x86, 
but I felt the same as Marc; even just counting the number of 
occurrences of each exit reason is a nontrivial amount of memory to 
spend on each vCPU.

Paolo


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-22 15:37       ` Paolo Bonzini
  0 siblings, 0 replies; 28+ messages in thread
From: Paolo Bonzini @ 2021-09-22 15:37 UTC (permalink / raw)
  To: Marc Zyngier, Jing Zhang
  Cc: KVM, Sean Christopherson, Peter Shier, David Matlack,
	Will Deacon, KVMARM

On 22/09/21 13:22, Marc Zyngier wrote:
> Frankly, this is a job for BPF and the tracing subsystem, not for some
> hardcoded syndrome accounting. It would allow to extract meaningful
> information, prevent bloat, and crucially make it optional. Even empty
> trace points like the ones used in the scheduler would be infinitely
> better than this (load your own module that hooks into these trace
> points, expose the data you want, any way you want).

I agree.  I had left out for later the similar series you had for x86, 
but I felt the same as Marc; even just counting the number of 
occurrences of each exit reason is a nontrivial amount of memory to 
spend on each vCPU.

Paolo

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-22 15:37       ` Paolo Bonzini
@ 2021-09-22 16:09         ` Jing Zhang
  -1 siblings, 0 replies; 28+ messages in thread
From: Jing Zhang @ 2021-09-22 16:09 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marc Zyngier, KVM, KVMARM, Will Deacon, David Matlack,
	Peter Shier, Oliver Upton, Sean Christopherson

Hi Marc, Paolo,

On Wed, Sep 22, 2021 at 8:37 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 22/09/21 13:22, Marc Zyngier wrote:
> > Frankly, this is a job for BPF and the tracing subsystem, not for some
> > hardcoded syndrome accounting. It would allow to extract meaningful
> > information, prevent bloat, and crucially make it optional. Even empty
> > trace points like the ones used in the scheduler would be infinitely
> > better than this (load your own module that hooks into these trace
> > points, expose the data you want, any way you want).
>
> I agree.  I had left out for later the similar series you had for x86,
> but I felt the same as Marc; even just counting the number of
> occurrences of each exit reason is a nontrivial amount of memory to
> spend on each vCPU.
>
> Paolo
>
Thanks for the feedback about this. It is actually kind of what I
expected. The correct way is to add valuable h/w exit reasons which
have been verified useful for debugging and monitoring purposes
instead of exposing all of them blindly.
Will have an internal discussion about the historical reason why those
are needed in the first place and to see if the reason still exists.

Jing

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-22 16:09         ` Jing Zhang
  0 siblings, 0 replies; 28+ messages in thread
From: Jing Zhang @ 2021-09-22 16:09 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: KVM, Marc Zyngier, Peter Shier, Sean Christopherson,
	David Matlack, Will Deacon, KVMARM

Hi Marc, Paolo,

On Wed, Sep 22, 2021 at 8:37 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 22/09/21 13:22, Marc Zyngier wrote:
> > Frankly, this is a job for BPF and the tracing subsystem, not for some
> > hardcoded syndrome accounting. It would allow to extract meaningful
> > information, prevent bloat, and crucially make it optional. Even empty
> > trace points like the ones used in the scheduler would be infinitely
> > better than this (load your own module that hooks into these trace
> > points, expose the data you want, any way you want).
>
> I agree.  I had left out for later the similar series you had for x86,
> but I felt the same as Marc; even just counting the number of
> occurrences of each exit reason is a nontrivial amount of memory to
> spend on each vCPU.
>
> Paolo
>
Thanks for the feedback about this. It is actually kind of what I
expected. The correct way is to add valuable h/w exit reasons which
have been verified useful for debugging and monitoring purposes
instead of exposing all of them blindly.
Will have an internal discussion about the historical reason why those
are needed in the first place and to see if the reason still exists.

Jing
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-22 15:37       ` Paolo Bonzini
@ 2021-09-22 18:13         ` Sean Christopherson
  -1 siblings, 0 replies; 28+ messages in thread
From: Sean Christopherson @ 2021-09-22 18:13 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Marc Zyngier, Jing Zhang, KVM, KVMARM, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Jim Mattson,
	Ben Gardon, Aaron Lewis, Venkatesh Srinivas

+Google folks

On Wed, Sep 22, 2021, Paolo Bonzini wrote:
> On 22/09/21 13:22, Marc Zyngier wrote:
> > Frankly, this is a job for BPF and the tracing subsystem, not for some
> > hardcoded syndrome accounting. It would allow to extract meaningful
> > information, prevent bloat, and crucially make it optional. Even empty
> > trace points like the ones used in the scheduler would be infinitely
> > better than this (load your own module that hooks into these trace
> > points, expose the data you want, any way you want).
> 
> I agree.  I had left out for later the similar series you had for x86, but I
> felt the same as Marc; even just counting the number of occurrences of each
> exit reason is a nontrivial amount of memory to spend on each vCPU.

That depends on the use case, environment, etc...  E.g. if the VM is assigned a
_minimum_ of 4gb per vCPU, then burning even tens of kilobytes of memory per vCPU
is trivial, or at least completely acceptable.

I do 100% agree this should be optional, be it through an ioctl(), module/kernel
param, Kconfig, whatever.  The changelogs are also sorely lacking the motivation
for having dedicated stats; we'll do our best to remedy that for future work.

Stepping back a bit, this is one piece of the larger issue of how to modernize
KVM for hyperscale usage.  BPF and tracing are great when the debugger has root
access to the machine and can rerun the failing workload at will.  They're useless
for identifying trends across large numbers of machines, triaging failures after
the fact, debugging performance issues with workloads that the debugger doesn't
have direct access to, etc...

Logging is a similar story, e.g. using _ratelimited() printk to aid debug works
well when there are a very limited number of VMs and there is a human that can
react to arbitrary kernel messages, but it's basically useless when there are 10s
or 100s of VMs and taking action on a kernel message requires a prior knowledge
of the message.

I'm certainly not expecting other people to solve our challenges, and I fully
appreciate that there are many KVM users that don't care at all about scalability,
but I'm hoping we can get the community at large, and especially maintainers and
reviewers, to also consider at-scale use cases when designing, implementing,
reviewing, etc...

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-22 18:13         ` Sean Christopherson
  0 siblings, 0 replies; 28+ messages in thread
From: Sean Christopherson @ 2021-09-22 18:13 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Aaron Lewis, KVM, Venkatesh Srinivas, Marc Zyngier, Peter Shier,
	Ben Gardon, David Matlack, Will Deacon, KVMARM, Jim Mattson

+Google folks

On Wed, Sep 22, 2021, Paolo Bonzini wrote:
> On 22/09/21 13:22, Marc Zyngier wrote:
> > Frankly, this is a job for BPF and the tracing subsystem, not for some
> > hardcoded syndrome accounting. It would allow to extract meaningful
> > information, prevent bloat, and crucially make it optional. Even empty
> > trace points like the ones used in the scheduler would be infinitely
> > better than this (load your own module that hooks into these trace
> > points, expose the data you want, any way you want).
> 
> I agree.  I had left out for later the similar series you had for x86, but I
> felt the same as Marc; even just counting the number of occurrences of each
> exit reason is a nontrivial amount of memory to spend on each vCPU.

That depends on the use case, environment, etc...  E.g. if the VM is assigned a
_minimum_ of 4gb per vCPU, then burning even tens of kilobytes of memory per vCPU
is trivial, or at least completely acceptable.

I do 100% agree this should be optional, be it through an ioctl(), module/kernel
param, Kconfig, whatever.  The changelogs are also sorely lacking the motivation
for having dedicated stats; we'll do our best to remedy that for future work.

Stepping back a bit, this is one piece of the larger issue of how to modernize
KVM for hyperscale usage.  BPF and tracing are great when the debugger has root
access to the machine and can rerun the failing workload at will.  They're useless
for identifying trends across large numbers of machines, triaging failures after
the fact, debugging performance issues with workloads that the debugger doesn't
have direct access to, etc...

Logging is a similar story, e.g. using _ratelimited() printk to aid debug works
well when there are a very limited number of VMs and there is a human that can
react to arbitrary kernel messages, but it's basically useless when there are 10s
or 100s of VMs and taking action on a kernel message requires a prior knowledge
of the message.

I'm certainly not expecting other people to solve our challenges, and I fully
appreciate that there are many KVM users that don't care at all about scalability,
but I'm hoping we can get the community at large, and especially maintainers and
reviewers, to also consider at-scale use cases when designing, implementing,
reviewing, etc...
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-22 18:13         ` Sean Christopherson
@ 2021-09-22 18:53           ` Marc Zyngier
  -1 siblings, 0 replies; 28+ messages in thread
From: Marc Zyngier @ 2021-09-22 18:53 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Jing Zhang, KVM, KVMARM, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Jim Mattson,
	Ben Gardon, Aaron Lewis, Venkatesh Srinivas

On Wed, 22 Sep 2021 19:13:40 +0100,
Sean Christopherson <seanjc@google.com> wrote:

> Stepping back a bit, this is one piece of the larger issue of how to
> modernize KVM for hyperscale usage.  BPF and tracing are great when
> the debugger has root access to the machine and can rerun the
> failing workload at will.  They're useless for identifying trends
> across large numbers of machines, triaging failures after the fact,
> debugging performance issues with workloads that the debugger
> doesn't have direct access to, etc...

Which is why I suggested the use of trace points as kernel module
hooks to perform whatever accounting you require. This would give you
the same level of detail this series exposes.

And I'm all for adding these hooks where it matters as long as they
are not considered ABI and don't appear in /sys/debug/tracing (in
general, no userspace visibility).

The scheduler is a interesting example of this, as it exposes all sort
of hooks for people to look under the hood. No user of the hook? No
overhead, no additional memory used. I may have heard that Android
makes heavy use of this.

Because I'm pretty sure that whatever stat we expose, every cloud
vendor will want their own variant, so we may just as well put the
matter in their own hands.

I also wouldn't discount BPF as a possibility. You could perfectly
have permanent BPF programs running from the moment you boot the
system, and use that to generate your histograms. This isn't necessary
a one off, debug only solution.

> Logging is a similar story, e.g. using _ratelimited() printk to aid
> debug works well when there are a very limited number of VMs and
> there is a human that can react to arbitrary kernel messages, but
> it's basically useless when there are 10s or 100s of VMs and taking
> action on a kernel message requires a prior knowledge of the
> message.

I'm not sure logging is remotely the same. For a start, the kernel
should not log anything unless something has oopsed (and yes, I still
have some bits to clean on the arm64 side). I'm not even sure what you
would want to log. I'd like to understand the need here, because I
feel like I'm missing something.

> I'm certainly not expecting other people to solve our challenges,
> and I fully appreciate that there are many KVM users that don't care
> at all about scalability, but I'm hoping we can get the community at
> large, and especially maintainers and reviewers, to also consider
> at-scale use cases when designing, implementing, reviewing, etc...

My take is that scalability has to go with flexibility. Anything that
gets hardcoded will quickly become a burden: I definitely regret
adding the current KVM trace points, as they don't show what I need,
and I can't change them as they are ABI.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-22 18:53           ` Marc Zyngier
  0 siblings, 0 replies; 28+ messages in thread
From: Marc Zyngier @ 2021-09-22 18:53 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Aaron Lewis, KVM, Venkatesh Srinivas, Peter Shier, Ben Gardon,
	David Matlack, Paolo Bonzini, Will Deacon, KVMARM, Jim Mattson

On Wed, 22 Sep 2021 19:13:40 +0100,
Sean Christopherson <seanjc@google.com> wrote:

> Stepping back a bit, this is one piece of the larger issue of how to
> modernize KVM for hyperscale usage.  BPF and tracing are great when
> the debugger has root access to the machine and can rerun the
> failing workload at will.  They're useless for identifying trends
> across large numbers of machines, triaging failures after the fact,
> debugging performance issues with workloads that the debugger
> doesn't have direct access to, etc...

Which is why I suggested the use of trace points as kernel module
hooks to perform whatever accounting you require. This would give you
the same level of detail this series exposes.

And I'm all for adding these hooks where it matters as long as they
are not considered ABI and don't appear in /sys/debug/tracing (in
general, no userspace visibility).

The scheduler is a interesting example of this, as it exposes all sort
of hooks for people to look under the hood. No user of the hook? No
overhead, no additional memory used. I may have heard that Android
makes heavy use of this.

Because I'm pretty sure that whatever stat we expose, every cloud
vendor will want their own variant, so we may just as well put the
matter in their own hands.

I also wouldn't discount BPF as a possibility. You could perfectly
have permanent BPF programs running from the moment you boot the
system, and use that to generate your histograms. This isn't necessary
a one off, debug only solution.

> Logging is a similar story, e.g. using _ratelimited() printk to aid
> debug works well when there are a very limited number of VMs and
> there is a human that can react to arbitrary kernel messages, but
> it's basically useless when there are 10s or 100s of VMs and taking
> action on a kernel message requires a prior knowledge of the
> message.

I'm not sure logging is remotely the same. For a start, the kernel
should not log anything unless something has oopsed (and yes, I still
have some bits to clean on the arm64 side). I'm not even sure what you
would want to log. I'd like to understand the need here, because I
feel like I'm missing something.

> I'm certainly not expecting other people to solve our challenges,
> and I fully appreciate that there are many KVM users that don't care
> at all about scalability, but I'm hoping we can get the community at
> large, and especially maintainers and reviewers, to also consider
> at-scale use cases when designing, implementing, reviewing, etc...

My take is that scalability has to go with flexibility. Anything that
gets hardcoded will quickly become a burden: I definitely regret
adding the current KVM trace points, as they don't show what I need,
and I can't change them as they are ABI.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-22 18:53           ` Marc Zyngier
@ 2021-09-22 23:22             ` David Matlack
  -1 siblings, 0 replies; 28+ messages in thread
From: David Matlack @ 2021-09-22 23:22 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Sean Christopherson, Paolo Bonzini, Jing Zhang, KVM, KVMARM,
	Will Deacon, Peter Shier, Oliver Upton, Jim Mattson, Ben Gardon,
	Aaron Lewis, Venkatesh Srinivas

On Wed, Sep 22, 2021 at 11:53 AM Marc Zyngier <maz@kernel.org> wrote:
>
> On Wed, 22 Sep 2021 19:13:40 +0100,
> Sean Christopherson <seanjc@google.com> wrote:
>
> > Stepping back a bit, this is one piece of the larger issue of how to
> > modernize KVM for hyperscale usage.  BPF and tracing are great when
> > the debugger has root access to the machine and can rerun the
> > failing workload at will.  They're useless for identifying trends
> > across large numbers of machines, triaging failures after the fact,
> > debugging performance issues with workloads that the debugger
> > doesn't have direct access to, etc...
>
> Which is why I suggested the use of trace points as kernel module
> hooks to perform whatever accounting you require. This would give you
> the same level of detail this series exposes.

How would a kernel module (or BPF program) get the data to userspace?
The KVM stats interface that Jing added requires KVM to know how to
get the data when handling the read() syscall.

>
> And I'm all for adding these hooks where it matters as long as they
> are not considered ABI and don't appear in /sys/debug/tracing (in
> general, no userspace visibility).
>
> The scheduler is a interesting example of this, as it exposes all sort
> of hooks for people to look under the hood. No user of the hook? No
> overhead, no additional memory used. I may have heard that Android
> makes heavy use of this.
>
> Because I'm pretty sure that whatever stat we expose, every cloud
> vendor will want their own variant, so we may just as well put the
> matter in their own hands.

I think this can be mitigated by requiring sufficient justification
when adding a new stat to KVM. There has to be a real use-case and it
has to be explained in the changelog. If a stat has a use-case for one
cloud provider, it will likely be useful to other cloud providers as
well.

>
> I also wouldn't discount BPF as a possibility. You could perfectly
> have permanent BPF programs running from the moment you boot the
> system, and use that to generate your histograms. This isn't necessary
> a one off, debug only solution.
>
> > Logging is a similar story, e.g. using _ratelimited() printk to aid
> > debug works well when there are a very limited number of VMs and
> > there is a human that can react to arbitrary kernel messages, but
> > it's basically useless when there are 10s or 100s of VMs and taking
> > action on a kernel message requires a prior knowledge of the
> > message.
>
> I'm not sure logging is remotely the same. For a start, the kernel
> should not log anything unless something has oopsed (and yes, I still
> have some bits to clean on the arm64 side). I'm not even sure what you
> would want to log. I'd like to understand the need here, because I
> feel like I'm missing something.
>
> > I'm certainly not expecting other people to solve our challenges,
> > and I fully appreciate that there are many KVM users that don't care
> > at all about scalability, but I'm hoping we can get the community at
> > large, and especially maintainers and reviewers, to also consider
> > at-scale use cases when designing, implementing, reviewing, etc...
>
> My take is that scalability has to go with flexibility. Anything that
> gets hardcoded will quickly become a burden: I definitely regret
> adding the current KVM trace points, as they don't show what I need,
> and I can't change them as they are ABI.

This brings up a good discussion topic: To what extent are the KVM
stats themselves an ABI? I don't think this is documented anywhere.
The API itself is completely dynamic and does not hardcode a list of
stats or metadata about them. Userspace has to read stats fd to see
what's there.

Fwiw we just deleted the lpages stat without any drama.




>
> Thanks,
>
>         M.
>
> --
> Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-22 23:22             ` David Matlack
  0 siblings, 0 replies; 28+ messages in thread
From: David Matlack @ 2021-09-22 23:22 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Aaron Lewis, KVM, Venkatesh Srinivas, Peter Shier, Ben Gardon,
	Paolo Bonzini, Will Deacon, KVMARM, Jim Mattson

On Wed, Sep 22, 2021 at 11:53 AM Marc Zyngier <maz@kernel.org> wrote:
>
> On Wed, 22 Sep 2021 19:13:40 +0100,
> Sean Christopherson <seanjc@google.com> wrote:
>
> > Stepping back a bit, this is one piece of the larger issue of how to
> > modernize KVM for hyperscale usage.  BPF and tracing are great when
> > the debugger has root access to the machine and can rerun the
> > failing workload at will.  They're useless for identifying trends
> > across large numbers of machines, triaging failures after the fact,
> > debugging performance issues with workloads that the debugger
> > doesn't have direct access to, etc...
>
> Which is why I suggested the use of trace points as kernel module
> hooks to perform whatever accounting you require. This would give you
> the same level of detail this series exposes.

How would a kernel module (or BPF program) get the data to userspace?
The KVM stats interface that Jing added requires KVM to know how to
get the data when handling the read() syscall.

>
> And I'm all for adding these hooks where it matters as long as they
> are not considered ABI and don't appear in /sys/debug/tracing (in
> general, no userspace visibility).
>
> The scheduler is a interesting example of this, as it exposes all sort
> of hooks for people to look under the hood. No user of the hook? No
> overhead, no additional memory used. I may have heard that Android
> makes heavy use of this.
>
> Because I'm pretty sure that whatever stat we expose, every cloud
> vendor will want their own variant, so we may just as well put the
> matter in their own hands.

I think this can be mitigated by requiring sufficient justification
when adding a new stat to KVM. There has to be a real use-case and it
has to be explained in the changelog. If a stat has a use-case for one
cloud provider, it will likely be useful to other cloud providers as
well.

>
> I also wouldn't discount BPF as a possibility. You could perfectly
> have permanent BPF programs running from the moment you boot the
> system, and use that to generate your histograms. This isn't necessary
> a one off, debug only solution.
>
> > Logging is a similar story, e.g. using _ratelimited() printk to aid
> > debug works well when there are a very limited number of VMs and
> > there is a human that can react to arbitrary kernel messages, but
> > it's basically useless when there are 10s or 100s of VMs and taking
> > action on a kernel message requires a prior knowledge of the
> > message.
>
> I'm not sure logging is remotely the same. For a start, the kernel
> should not log anything unless something has oopsed (and yes, I still
> have some bits to clean on the arm64 side). I'm not even sure what you
> would want to log. I'd like to understand the need here, because I
> feel like I'm missing something.
>
> > I'm certainly not expecting other people to solve our challenges,
> > and I fully appreciate that there are many KVM users that don't care
> > at all about scalability, but I'm hoping we can get the community at
> > large, and especially maintainers and reviewers, to also consider
> > at-scale use cases when designing, implementing, reviewing, etc...
>
> My take is that scalability has to go with flexibility. Anything that
> gets hardcoded will quickly become a burden: I definitely regret
> adding the current KVM trace points, as they don't show what I need,
> and I can't change them as they are ABI.

This brings up a good discussion topic: To what extent are the KVM
stats themselves an ABI? I don't think this is documented anywhere.
The API itself is completely dynamic and does not hardcode a list of
stats or metadata about them. Userspace has to read stats fd to see
what's there.

Fwiw we just deleted the lpages stat without any drama.




>
> Thanks,
>
>         M.
>
> --
> Without deviation from the norm, progress is not possible.
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-22 18:53           ` Marc Zyngier
@ 2021-09-23  6:36             ` Paolo Bonzini
  -1 siblings, 0 replies; 28+ messages in thread
From: Paolo Bonzini @ 2021-09-23  6:36 UTC (permalink / raw)
  To: Marc Zyngier, Sean Christopherson
  Cc: Jing Zhang, KVM, KVMARM, Will Deacon, David Matlack, Peter Shier,
	Oliver Upton, Jim Mattson, Ben Gardon, Aaron Lewis,
	Venkatesh Srinivas

On 22/09/21 20:53, Marc Zyngier wrote:
> I definitely regret adding the current KVM trace points, as they
> don't show what I need, and I can't change them as they are ABI.

I disagree that they are ABI.  And even if you don't want to change 
them, you can always add parameters or remove them.

Paolo


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-23  6:36             ` Paolo Bonzini
  0 siblings, 0 replies; 28+ messages in thread
From: Paolo Bonzini @ 2021-09-23  6:36 UTC (permalink / raw)
  To: Marc Zyngier, Sean Christopherson
  Cc: Aaron Lewis, KVM, Venkatesh Srinivas, Peter Shier, Ben Gardon,
	David Matlack, Will Deacon, KVMARM, Jim Mattson

On 22/09/21 20:53, Marc Zyngier wrote:
> I definitely regret adding the current KVM trace points, as they
> don't show what I need, and I can't change them as they are ABI.

I disagree that they are ABI.  And even if you don't want to change 
them, you can always add parameters or remove them.

Paolo

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-23  6:36             ` Paolo Bonzini
@ 2021-09-23  7:45               ` Marc Zyngier
  -1 siblings, 0 replies; 28+ messages in thread
From: Marc Zyngier @ 2021-09-23  7:45 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Sean Christopherson, Jing Zhang, KVM, KVMARM, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Jim Mattson,
	Ben Gardon, Aaron Lewis, Venkatesh Srinivas

On Thu, 23 Sep 2021 07:36:21 +0100,
Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> On 22/09/21 20:53, Marc Zyngier wrote:
> > I definitely regret adding the current KVM trace points, as they
> > don't show what I need, and I can't change them as they are ABI.
> 
> I disagree that they are ABI.  And even if you don't want to change
> them, you can always add parameters or remove them.

We'll have to agree to disagree here. Experience has told me that
anything that gets exposed to userspace has to stay forever. There are
countless examples of that on the arm64 side (cue the bogomips debate,
the recent /proc/interrupts repainting).

We had that discussion a few KSs ago (triggered by this[1] if I
remember correctly), and I don't think anything has changed since.

As for removing them, that would probably be best for some (if not
most) of them.

	M.

[1] https://lwn.net/Articles/737532/

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-23  7:45               ` Marc Zyngier
  0 siblings, 0 replies; 28+ messages in thread
From: Marc Zyngier @ 2021-09-23  7:45 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Aaron Lewis, KVM, Venkatesh Srinivas, Peter Shier, Ben Gardon,
	David Matlack, Will Deacon, KVMARM, Jim Mattson

On Thu, 23 Sep 2021 07:36:21 +0100,
Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> On 22/09/21 20:53, Marc Zyngier wrote:
> > I definitely regret adding the current KVM trace points, as they
> > don't show what I need, and I can't change them as they are ABI.
> 
> I disagree that they are ABI.  And even if you don't want to change
> them, you can always add parameters or remove them.

We'll have to agree to disagree here. Experience has told me that
anything that gets exposed to userspace has to stay forever. There are
countless examples of that on the arm64 side (cue the bogomips debate,
the recent /proc/interrupts repainting).

We had that discussion a few KSs ago (triggered by this[1] if I
remember correctly), and I don't think anything has changed since.

As for removing them, that would probably be best for some (if not
most) of them.

	M.

[1] https://lwn.net/Articles/737532/

-- 
Without deviation from the norm, progress is not possible.
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-23  7:45               ` Marc Zyngier
@ 2021-09-23  9:44                 ` Paolo Bonzini
  -1 siblings, 0 replies; 28+ messages in thread
From: Paolo Bonzini @ 2021-09-23  9:44 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Sean Christopherson, Jing Zhang, KVM, KVMARM, Will Deacon,
	David Matlack, Peter Shier, Oliver Upton, Jim Mattson,
	Ben Gardon, Aaron Lewis, Venkatesh Srinivas

On 23/09/21 09:45, Marc Zyngier wrote:
> On Thu, 23 Sep 2021 07:36:21 +0100,
> Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>> On 22/09/21 20:53, Marc Zyngier wrote:
>>> I definitely regret adding the current KVM trace points, as they
>>> don't show what I need, and I can't change them as they are ABI.
>>
>> I disagree that they are ABI.  And even if you don't want to change
>> them, you can always add parameters or remove them.
> 
> We'll have to agree to disagree here. Experience has told me that
> anything that gets exposed to userspace has to stay forever. There are
> countless examples of that on the arm64 side (cue the bogomips debate,
> the recent /proc/interrupts repainting).

Files certainly have the problem that there are countless ways to parse 
them, most of them wrong.  This for example was taken into account when 
designing the binary stats format, where it's clear that the only fixed 
format (ABI) is the description of the stats themselves.

However yeah, you're right that what constitutes an API is complicated. 
  Tracepoints and binary stats make it trivial to add stuff (including 
adding more info in the case of a tracepoint), but removing is tricky.

Another important aspect is whether there are at all any tools using the 
tracepoints.  In the case of the block subsystem there's blktrace, but 
KVM doesn't have anything fancy (tracing is really only used by humans 
via trace-cmd, or by kvmstat which doesn't care about which tracepoints 
are there).

> We had that discussion a few KSs ago (triggered by this[1] if I
> remember correctly), and I don't think anything has changed since.
> 
> As for removing them, that would probably be best for some (if not
> most) of them.

I'd say just go ahead.  System calls are removed every now and then if 
they are considered obsolete or a failed experiment.  Tracepoints are in 
the same boat.

Paolo


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-23  9:44                 ` Paolo Bonzini
  0 siblings, 0 replies; 28+ messages in thread
From: Paolo Bonzini @ 2021-09-23  9:44 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Aaron Lewis, KVM, Venkatesh Srinivas, Peter Shier, Ben Gardon,
	David Matlack, Will Deacon, KVMARM, Jim Mattson

On 23/09/21 09:45, Marc Zyngier wrote:
> On Thu, 23 Sep 2021 07:36:21 +0100,
> Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>> On 22/09/21 20:53, Marc Zyngier wrote:
>>> I definitely regret adding the current KVM trace points, as they
>>> don't show what I need, and I can't change them as they are ABI.
>>
>> I disagree that they are ABI.  And even if you don't want to change
>> them, you can always add parameters or remove them.
> 
> We'll have to agree to disagree here. Experience has told me that
> anything that gets exposed to userspace has to stay forever. There are
> countless examples of that on the arm64 side (cue the bogomips debate,
> the recent /proc/interrupts repainting).

Files certainly have the problem that there are countless ways to parse 
them, most of them wrong.  This for example was taken into account when 
designing the binary stats format, where it's clear that the only fixed 
format (ABI) is the description of the stats themselves.

However yeah, you're right that what constitutes an API is complicated. 
  Tracepoints and binary stats make it trivial to add stuff (including 
adding more info in the case of a tracepoint), but removing is tricky.

Another important aspect is whether there are at all any tools using the 
tracepoints.  In the case of the block subsystem there's blktrace, but 
KVM doesn't have anything fancy (tracing is really only used by humans 
via trace-cmd, or by kvmstat which doesn't care about which tracepoints 
are there).

> We had that discussion a few KSs ago (triggered by this[1] if I
> remember correctly), and I don't think anything has changed since.
> 
> As for removing them, that would probably be best for some (if not
> most) of them.

I'd say just go ahead.  System calls are removed every now and then if 
they are considered obsolete or a failed experiment.  Tracepoints are in 
the same boat.

Paolo

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-22 23:22             ` David Matlack
@ 2021-09-30 14:04               ` Marc Zyngier
  -1 siblings, 0 replies; 28+ messages in thread
From: Marc Zyngier @ 2021-09-30 14:04 UTC (permalink / raw)
  To: David Matlack
  Cc: Sean Christopherson, Paolo Bonzini, Jing Zhang, KVM, KVMARM,
	Will Deacon, Peter Shier, Oliver Upton, Jim Mattson, Ben Gardon,
	Aaron Lewis, Venkatesh Srinivas

On Thu, 23 Sep 2021 00:22:12 +0100,
David Matlack <dmatlack@google.com> wrote:
> 
> On Wed, Sep 22, 2021 at 11:53 AM Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Wed, 22 Sep 2021 19:13:40 +0100,
> > Sean Christopherson <seanjc@google.com> wrote:
> >
> > > Stepping back a bit, this is one piece of the larger issue of how to
> > > modernize KVM for hyperscale usage.  BPF and tracing are great when
> > > the debugger has root access to the machine and can rerun the
> > > failing workload at will.  They're useless for identifying trends
> > > across large numbers of machines, triaging failures after the fact,
> > > debugging performance issues with workloads that the debugger
> > > doesn't have direct access to, etc...
> >
> > Which is why I suggested the use of trace points as kernel module
> > hooks to perform whatever accounting you require. This would give you
> > the same level of detail this series exposes.
> 
> How would a kernel module (or BPF program) get the data to userspace?
> The KVM stats interface that Jing added requires KVM to know how to
> get the data when handling the read() syscall.

I don't think it'd be that hard to funnel stats generated in a module
through the same read interface.

> > And I'm all for adding these hooks where it matters as long as they
> > are not considered ABI and don't appear in /sys/debug/tracing (in
> > general, no userspace visibility).
> >
> > The scheduler is a interesting example of this, as it exposes all sort
> > of hooks for people to look under the hood. No user of the hook? No
> > overhead, no additional memory used. I may have heard that Android
> > makes heavy use of this.
> >
> > Because I'm pretty sure that whatever stat we expose, every cloud
> > vendor will want their own variant, so we may just as well put the
> > matter in their own hands.
> 
> I think this can be mitigated by requiring sufficient justification
> when adding a new stat to KVM. There has to be a real use-case and it
> has to be explained in the changelog. If a stat has a use-case for one
> cloud provider, it will likely be useful to other cloud providers as
> well.

My (limited) personal experience is significantly different. The
diversity of setups make the set of relevant stats pretty hard to
guess (there isn't much in common if you use KVM to strictly partition
a system vs oversubscribing it).

> 
> >
> > I also wouldn't discount BPF as a possibility. You could perfectly
> > have permanent BPF programs running from the moment you boot the
> > system, and use that to generate your histograms. This isn't necessary
> > a one off, debug only solution.
> >
> > > Logging is a similar story, e.g. using _ratelimited() printk to aid
> > > debug works well when there are a very limited number of VMs and
> > > there is a human that can react to arbitrary kernel messages, but
> > > it's basically useless when there are 10s or 100s of VMs and taking
> > > action on a kernel message requires a prior knowledge of the
> > > message.
> >
> > I'm not sure logging is remotely the same. For a start, the kernel
> > should not log anything unless something has oopsed (and yes, I still
> > have some bits to clean on the arm64 side). I'm not even sure what you
> > would want to log. I'd like to understand the need here, because I
> > feel like I'm missing something.
> >
> > > I'm certainly not expecting other people to solve our challenges,
> > > and I fully appreciate that there are many KVM users that don't care
> > > at all about scalability, but I'm hoping we can get the community at
> > > large, and especially maintainers and reviewers, to also consider
> > > at-scale use cases when designing, implementing, reviewing, etc...
> >
> > My take is that scalability has to go with flexibility. Anything that
> > gets hardcoded will quickly become a burden: I definitely regret
> > adding the current KVM trace points, as they don't show what I need,
> > and I can't change them as they are ABI.
> 
> This brings up a good discussion topic: To what extent are the KVM
> stats themselves an ABI? I don't think this is documented anywhere.
> The API itself is completely dynamic and does not hardcode a list of
> stats or metadata about them. Userspace has to read stats fd to see
> what's there.
> 
> Fwiw we just deleted the lpages stat without any drama.

Maybe the new discoverable interface makes dropping some stats
easier. But it still remains that what is useless for someone has the
potential of being crucial for someone else. I wouldn't be surprised
if someone would ask for this stat back once they upgrade to a recent
host kernel, probably in a couple of years from now.

	M.

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-30 14:04               ` Marc Zyngier
  0 siblings, 0 replies; 28+ messages in thread
From: Marc Zyngier @ 2021-09-30 14:04 UTC (permalink / raw)
  To: David Matlack
  Cc: Aaron Lewis, KVM, Venkatesh Srinivas, Peter Shier, Ben Gardon,
	Paolo Bonzini, Will Deacon, KVMARM, Jim Mattson

On Thu, 23 Sep 2021 00:22:12 +0100,
David Matlack <dmatlack@google.com> wrote:
> 
> On Wed, Sep 22, 2021 at 11:53 AM Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Wed, 22 Sep 2021 19:13:40 +0100,
> > Sean Christopherson <seanjc@google.com> wrote:
> >
> > > Stepping back a bit, this is one piece of the larger issue of how to
> > > modernize KVM for hyperscale usage.  BPF and tracing are great when
> > > the debugger has root access to the machine and can rerun the
> > > failing workload at will.  They're useless for identifying trends
> > > across large numbers of machines, triaging failures after the fact,
> > > debugging performance issues with workloads that the debugger
> > > doesn't have direct access to, etc...
> >
> > Which is why I suggested the use of trace points as kernel module
> > hooks to perform whatever accounting you require. This would give you
> > the same level of detail this series exposes.
> 
> How would a kernel module (or BPF program) get the data to userspace?
> The KVM stats interface that Jing added requires KVM to know how to
> get the data when handling the read() syscall.

I don't think it'd be that hard to funnel stats generated in a module
through the same read interface.

> > And I'm all for adding these hooks where it matters as long as they
> > are not considered ABI and don't appear in /sys/debug/tracing (in
> > general, no userspace visibility).
> >
> > The scheduler is a interesting example of this, as it exposes all sort
> > of hooks for people to look under the hood. No user of the hook? No
> > overhead, no additional memory used. I may have heard that Android
> > makes heavy use of this.
> >
> > Because I'm pretty sure that whatever stat we expose, every cloud
> > vendor will want their own variant, so we may just as well put the
> > matter in their own hands.
> 
> I think this can be mitigated by requiring sufficient justification
> when adding a new stat to KVM. There has to be a real use-case and it
> has to be explained in the changelog. If a stat has a use-case for one
> cloud provider, it will likely be useful to other cloud providers as
> well.

My (limited) personal experience is significantly different. The
diversity of setups make the set of relevant stats pretty hard to
guess (there isn't much in common if you use KVM to strictly partition
a system vs oversubscribing it).

> 
> >
> > I also wouldn't discount BPF as a possibility. You could perfectly
> > have permanent BPF programs running from the moment you boot the
> > system, and use that to generate your histograms. This isn't necessary
> > a one off, debug only solution.
> >
> > > Logging is a similar story, e.g. using _ratelimited() printk to aid
> > > debug works well when there are a very limited number of VMs and
> > > there is a human that can react to arbitrary kernel messages, but
> > > it's basically useless when there are 10s or 100s of VMs and taking
> > > action on a kernel message requires a prior knowledge of the
> > > message.
> >
> > I'm not sure logging is remotely the same. For a start, the kernel
> > should not log anything unless something has oopsed (and yes, I still
> > have some bits to clean on the arm64 side). I'm not even sure what you
> > would want to log. I'd like to understand the need here, because I
> > feel like I'm missing something.
> >
> > > I'm certainly not expecting other people to solve our challenges,
> > > and I fully appreciate that there are many KVM users that don't care
> > > at all about scalability, but I'm hoping we can get the community at
> > > large, and especially maintainers and reviewers, to also consider
> > > at-scale use cases when designing, implementing, reviewing, etc...
> >
> > My take is that scalability has to go with flexibility. Anything that
> > gets hardcoded will quickly become a burden: I definitely regret
> > adding the current KVM trace points, as they don't show what I need,
> > and I can't change them as they are ABI.
> 
> This brings up a good discussion topic: To what extent are the KVM
> stats themselves an ABI? I don't think this is documented anywhere.
> The API itself is completely dynamic and does not hardcode a list of
> stats or metadata about them. Userspace has to read stats fd to see
> what's there.
> 
> Fwiw we just deleted the lpages stat without any drama.

Maybe the new discoverable interface makes dropping some stats
easier. But it still remains that what is useless for someone has the
potential of being crucial for someone else. I wouldn't be surprised
if someone would ask for this stat back once they upgrade to a recent
host kernel, probably in a couple of years from now.

	M.

-- 
Without deviation from the norm, progress is not possible.
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
  2021-09-30 14:04               ` Marc Zyngier
@ 2021-09-30 18:05                 ` Sean Christopherson
  -1 siblings, 0 replies; 28+ messages in thread
From: Sean Christopherson @ 2021-09-30 18:05 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: David Matlack, Paolo Bonzini, Jing Zhang, KVM, KVMARM,
	Will Deacon, Peter Shier, Oliver Upton, Jim Mattson, Ben Gardon,
	Aaron Lewis, Venkatesh Srinivas

On Thu, Sep 30, 2021, Marc Zyngier wrote:
> On Thu, 23 Sep 2021 00:22:12 +0100, David Matlack <dmatlack@google.com> wrote:
> > 
> > On Wed, Sep 22, 2021 at 11:53 AM Marc Zyngier <maz@kernel.org> wrote:
> > > And I'm all for adding these hooks where it matters as long as they
> > > are not considered ABI and don't appear in /sys/debug/tracing (in
> > > general, no userspace visibility).

On our side (GCP), we very much want these to be ABI so that upgrading the
kernel/KVM doesn't break the userspace collection of stats.

> > > The scheduler is a interesting example of this, as it exposes all sort
> > > of hooks for people to look under the hood. No user of the hook? No
> > > overhead, no additional memory used. I may have heard that Android
> > > makes heavy use of this.
> > >
> > > Because I'm pretty sure that whatever stat we expose, every cloud
> > > vendor will want their own variant, so we may just as well put the
> > > matter in their own hands.
> > 
> > I think this can be mitigated by requiring sufficient justification
> > when adding a new stat to KVM. There has to be a real use-case and it
> > has to be explained in the changelog. If a stat has a use-case for one
> > cloud provider, it will likely be useful to other cloud providers as
> > well.
> 
> My (limited) personal experience is significantly different. The
> diversity of setups make the set of relevant stats pretty hard to
> guess (there isn't much in common if you use KVM to strictly partition
> a system vs oversubscribing it).

To some extent that's true for GCP and x86[*].  I think our position can be
succinctly described as a shotgun approach; we don't know exactly which stats will
be useful when, so grab as many as we can within reason.  As mentioned earlier,
burning 8kb+ of stats per vCPU is perfectly ok for our use cases because it's more
or less negligible compared to the amount of memory assigned to VMs.

This is why I think we should explore an approach that allows for enabling/disabling
groups of stats at compile time. 

[*] For x86 specifically, I think it's a bit easier to predict which stats are
    useful because KVM x86 is responsible for a smaller set of functionality compared
    to arm64, e.g. nearly everything at the system/platform level is handled by
    userspace, and so there are natural exit points to userspace for many of the
    intersesting touch points.  The places where we need stats are where userspace
    doesn't have any visibility into what KVM is doing.

> > > I also wouldn't discount BPF as a possibility. You could perfectly
> > > have permanent BPF programs running from the moment you boot the
> > > system, and use that to generate your histograms. This isn't necessary
> > > a one off, debug only solution.
> > >
> > > > Logging is a similar story, e.g. using _ratelimited() printk to aid
> > > > debug works well when there are a very limited number of VMs and
> > > > there is a human that can react to arbitrary kernel messages, but
> > > > it's basically useless when there are 10s or 100s of VMs and taking
> > > > action on a kernel message requires a prior knowledge of the
> > > > message.
> > >
> > > I'm not sure logging is remotely the same. For a start, the kernel
> > > should not log anything unless something has oopsed (and yes, I still
> > > have some bits to clean on the arm64 side). I'm not even sure what you
> > > would want to log. I'd like to understand the need here, because I
> > > feel like I'm missing something.

I think we're generally on the same page: kernel logging bad.  x86 has historically
used printks to "alert" userspace to notable behavior, e.g. when KVM knowingly
emulates an instruction incorrectly, or when the guest crashes.  The incorrect
instruction emulation isn't all that interesting since we're well aware of KVM's
shortcomings, but guest crashes are an instance where "logging" _to userspace_ is
very desirable, e.g. so that we can identify trends that may be due to bugs in
the host, or to provide the customer with additional data to help them figure out
what's wrong on their end.

"logging" in quotes because it doesn't necessarily have to be traditional logging,
e.g. for the crash cases, KVM already exits to userspace so the "hook" is there,
what's lacking is a way for KVM to dump additional information about the crash.

I brought up logging here purely to highlight that KVM, at least on the x86 side,
lacks infrastructure for running at scale, likely because it hasn't been needed in
the past.

> > > > I'm certainly not expecting other people to solve our challenges,
> > > > and I fully appreciate that there are many KVM users that don't care
> > > > at all about scalability, but I'm hoping we can get the community at
> > > > large, and especially maintainers and reviewers, to also consider
> > > > at-scale use cases when designing, implementing, reviewing, etc...
> > >
> > > My take is that scalability has to go with flexibility. Anything that
> > > gets hardcoded will quickly become a burden: I definitely regret
> > > adding the current KVM trace points, as they don't show what I need,
> > > and I can't change them as they are ABI.
> > 
> > This brings up a good discussion topic: To what extent are the KVM
> > stats themselves an ABI? I don't think this is documented anywhere.
> > The API itself is completely dynamic and does not hardcode a list of
> > stats or metadata about them. Userspace has to read stats fd to see
> > what's there.
> > 
> > Fwiw we just deleted the lpages stat without any drama.
> 
> Maybe the new discoverable interface makes dropping some stats
> easier. But it still remains that what is useless for someone has the
> potential of being crucial for someone else. I wouldn't be surprised
> if someone would ask for this stat back once they upgrade to a recent
> host kernel, probably in a couple of years from now.

lpages is bad example, it wasn't deleted so much as it was replaced by stats for
each page size (4kb, 2mb, 1gb).

I don't think x86 has any (recent) examples of a stat being truly dropped (though
there definitely some that IMO are quite useless).

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of arch specific exit reasons
@ 2021-09-30 18:05                 ` Sean Christopherson
  0 siblings, 0 replies; 28+ messages in thread
From: Sean Christopherson @ 2021-09-30 18:05 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Aaron Lewis, KVM, Venkatesh Srinivas, Peter Shier, Ben Gardon,
	David Matlack, Paolo Bonzini, Will Deacon, KVMARM, Jim Mattson

On Thu, Sep 30, 2021, Marc Zyngier wrote:
> On Thu, 23 Sep 2021 00:22:12 +0100, David Matlack <dmatlack@google.com> wrote:
> > 
> > On Wed, Sep 22, 2021 at 11:53 AM Marc Zyngier <maz@kernel.org> wrote:
> > > And I'm all for adding these hooks where it matters as long as they
> > > are not considered ABI and don't appear in /sys/debug/tracing (in
> > > general, no userspace visibility).

On our side (GCP), we very much want these to be ABI so that upgrading the
kernel/KVM doesn't break the userspace collection of stats.

> > > The scheduler is a interesting example of this, as it exposes all sort
> > > of hooks for people to look under the hood. No user of the hook? No
> > > overhead, no additional memory used. I may have heard that Android
> > > makes heavy use of this.
> > >
> > > Because I'm pretty sure that whatever stat we expose, every cloud
> > > vendor will want their own variant, so we may just as well put the
> > > matter in their own hands.
> > 
> > I think this can be mitigated by requiring sufficient justification
> > when adding a new stat to KVM. There has to be a real use-case and it
> > has to be explained in the changelog. If a stat has a use-case for one
> > cloud provider, it will likely be useful to other cloud providers as
> > well.
> 
> My (limited) personal experience is significantly different. The
> diversity of setups make the set of relevant stats pretty hard to
> guess (there isn't much in common if you use KVM to strictly partition
> a system vs oversubscribing it).

To some extent that's true for GCP and x86[*].  I think our position can be
succinctly described as a shotgun approach; we don't know exactly which stats will
be useful when, so grab as many as we can within reason.  As mentioned earlier,
burning 8kb+ of stats per vCPU is perfectly ok for our use cases because it's more
or less negligible compared to the amount of memory assigned to VMs.

This is why I think we should explore an approach that allows for enabling/disabling
groups of stats at compile time. 

[*] For x86 specifically, I think it's a bit easier to predict which stats are
    useful because KVM x86 is responsible for a smaller set of functionality compared
    to arm64, e.g. nearly everything at the system/platform level is handled by
    userspace, and so there are natural exit points to userspace for many of the
    intersesting touch points.  The places where we need stats are where userspace
    doesn't have any visibility into what KVM is doing.

> > > I also wouldn't discount BPF as a possibility. You could perfectly
> > > have permanent BPF programs running from the moment you boot the
> > > system, and use that to generate your histograms. This isn't necessary
> > > a one off, debug only solution.
> > >
> > > > Logging is a similar story, e.g. using _ratelimited() printk to aid
> > > > debug works well when there are a very limited number of VMs and
> > > > there is a human that can react to arbitrary kernel messages, but
> > > > it's basically useless when there are 10s or 100s of VMs and taking
> > > > action on a kernel message requires a prior knowledge of the
> > > > message.
> > >
> > > I'm not sure logging is remotely the same. For a start, the kernel
> > > should not log anything unless something has oopsed (and yes, I still
> > > have some bits to clean on the arm64 side). I'm not even sure what you
> > > would want to log. I'd like to understand the need here, because I
> > > feel like I'm missing something.

I think we're generally on the same page: kernel logging bad.  x86 has historically
used printks to "alert" userspace to notable behavior, e.g. when KVM knowingly
emulates an instruction incorrectly, or when the guest crashes.  The incorrect
instruction emulation isn't all that interesting since we're well aware of KVM's
shortcomings, but guest crashes are an instance where "logging" _to userspace_ is
very desirable, e.g. so that we can identify trends that may be due to bugs in
the host, or to provide the customer with additional data to help them figure out
what's wrong on their end.

"logging" in quotes because it doesn't necessarily have to be traditional logging,
e.g. for the crash cases, KVM already exits to userspace so the "hook" is there,
what's lacking is a way for KVM to dump additional information about the crash.

I brought up logging here purely to highlight that KVM, at least on the x86 side,
lacks infrastructure for running at scale, likely because it hasn't been needed in
the past.

> > > > I'm certainly not expecting other people to solve our challenges,
> > > > and I fully appreciate that there are many KVM users that don't care
> > > > at all about scalability, but I'm hoping we can get the community at
> > > > large, and especially maintainers and reviewers, to also consider
> > > > at-scale use cases when designing, implementing, reviewing, etc...
> > >
> > > My take is that scalability has to go with flexibility. Anything that
> > > gets hardcoded will quickly become a burden: I definitely regret
> > > adding the current KVM trace points, as they don't show what I need,
> > > and I can't change them as they are ABI.
> > 
> > This brings up a good discussion topic: To what extent are the KVM
> > stats themselves an ABI? I don't think this is documented anywhere.
> > The API itself is completely dynamic and does not hardcode a list of
> > stats or metadata about them. Userspace has to read stats fd to see
> > what's there.
> > 
> > Fwiw we just deleted the lpages stat without any drama.
> 
> Maybe the new discoverable interface makes dropping some stats
> easier. But it still remains that what is useless for someone has the
> potential of being crucial for someone else. I wouldn't be surprised
> if someone would ask for this stat back once they upgrade to a recent
> host kernel, probably in a couple of years from now.

lpages is bad example, it wasn't deleted so much as it was replaced by stats for
each page size (4kb, 2mb, 1gb).

I don't think x86 has any (recent) examples of a stat being truly dropped (though
there definitely some that IMO are quite useless).
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2021-09-30 18:05 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-22  1:08 [PATCH v1 1/3] KVM: arm64: Add arch specific exit reasons Jing Zhang
2021-09-22  1:08 ` Jing Zhang
2021-09-22  1:08 ` [PATCH v1 2/3] KVM: arm64: Add counter stats for " Jing Zhang
2021-09-22  1:08   ` Jing Zhang
2021-09-22  1:08 ` [PATCH v1 3/3] KVM: arm64: Add histogram stats for handling time of " Jing Zhang
2021-09-22  1:08   ` Jing Zhang
2021-09-22 11:22   ` Marc Zyngier
2021-09-22 11:22     ` Marc Zyngier
2021-09-22 15:37     ` Paolo Bonzini
2021-09-22 15:37       ` Paolo Bonzini
2021-09-22 16:09       ` Jing Zhang
2021-09-22 16:09         ` Jing Zhang
2021-09-22 18:13       ` Sean Christopherson
2021-09-22 18:13         ` Sean Christopherson
2021-09-22 18:53         ` Marc Zyngier
2021-09-22 18:53           ` Marc Zyngier
2021-09-22 23:22           ` David Matlack
2021-09-22 23:22             ` David Matlack
2021-09-30 14:04             ` Marc Zyngier
2021-09-30 14:04               ` Marc Zyngier
2021-09-30 18:05               ` Sean Christopherson
2021-09-30 18:05                 ` Sean Christopherson
2021-09-23  6:36           ` Paolo Bonzini
2021-09-23  6:36             ` Paolo Bonzini
2021-09-23  7:45             ` Marc Zyngier
2021-09-23  7:45               ` Marc Zyngier
2021-09-23  9:44               ` Paolo Bonzini
2021-09-23  9:44                 ` Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.