linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf/x86: descriptive failure messages for PMU init
@ 2019-04-12 19:09 Bandan Das
  2019-04-15  9:48 ` Jiri Olsa
  2019-04-15 12:04 ` Peter Zijlstra
  0 siblings, 2 replies; 4+ messages in thread
From: Bandan Das @ 2019-04-12 19:09 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo, Jiri Olsa
  Cc: linux-kernel, x86


There's a default warning message that gets printed, however,
there are various failure conditions:
 - a msr read can fail
 - a msr write can fail
 - a msr has an unexpected value
 - all msrs have unexpected values (disable PMU)

Also, commit commit 005bd0077a79 ("perf/x86: Modify error message in
virtualized environment") completely removed printing the msr in
question but these messages could be helpful for debugging vPMUs as
well. Add them back and change them to pr_debugs, this keeps the
behavior the same for baremetal.

Lastly, use %llx to silence checkpatch

Signed-off-by: Bandan Das <bsd@redhat.com>
---
 arch/x86/events/core.c | 66 ++++++++++++++++++++++++++++++++----------
 1 file changed, 50 insertions(+), 16 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e2b1447192a8..786e03893a0c 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -192,9 +192,16 @@ static void release_pmc_hardware(void) {}
 
 static bool check_hw_exists(void)
 {
-	u64 val, val_fail = -1, val_new= ~0;
-	int i, reg, reg_fail = -1, ret = 0;
-	int bios_fail = 0;
+	u64 val = -1, val_fail = -1, val_new = ~0;
+	int i, reg = -1, reg_fail = -1, ret = 0;
+	bool virt = boot_cpu_has(X86_FEATURE_HYPERVISOR) ? true : false;
+	enum {
+	      READ_FAIL	   =	1,
+	      WRITE_FAIL   =	2,
+	      PMU_FAIL	   =	3,
+	      BIOS_FAIL	   =	4,
+	};
+	int status = 0;
 	int reg_safe = -1;
 
 	/*
@@ -204,10 +211,13 @@ static bool check_hw_exists(void)
 	for (i = 0; i < x86_pmu.num_counters; i++) {
 		reg = x86_pmu_config_addr(i);
 		ret = rdmsrl_safe(reg, &val);
-		if (ret)
+		if (ret) {
+			status = READ_FAIL;
 			goto msr_fail;
+		}
+
 		if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
-			bios_fail = 1;
+			status = BIOS_FAIL;
 			val_fail = val;
 			reg_fail = reg;
 		} else {
@@ -218,11 +228,13 @@ static bool check_hw_exists(void)
 	if (x86_pmu.num_counters_fixed) {
 		reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 		ret = rdmsrl_safe(reg, &val);
-		if (ret)
+		if (ret) {
+			status = READ_FAIL;
 			goto msr_fail;
+		}
 		for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
 			if (val & (0x03 << i*4)) {
-				bios_fail = 1;
+				status = BIOS_FAIL;
 				val_fail = val;
 				reg_fail = reg;
 			}
@@ -236,7 +248,7 @@ static bool check_hw_exists(void)
 	 */
 
 	if (reg_safe == -1) {
-		reg = reg_safe;
+		status = PMU_FAIL;
 		goto msr_fail;
 	}
 
@@ -246,18 +258,22 @@ static bool check_hw_exists(void)
 	 * (qemu/kvm) that don't trap on the MSR access and always return 0s.
 	 */
 	reg = x86_pmu_event_addr(reg_safe);
-	if (rdmsrl_safe(reg, &val))
+	if (rdmsrl_safe(reg, &val)) {
+		status = READ_FAIL;
 		goto msr_fail;
+	}
 	val ^= 0xffffUL;
 	ret = wrmsrl_safe(reg, val);
 	ret |= rdmsrl_safe(reg, &val_new);
-	if (ret || val != val_new)
+	if (ret || val != val_new) {
+		status = WRITE_FAIL;
 		goto msr_fail;
+	}
 
 	/*
 	 * We still allow the PMU driver to operate:
 	 */
-	if (bios_fail) {
+	if (status == BIOS_FAIL) {
 		pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
 		pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
 			      reg_fail, val_fail);
@@ -266,12 +282,30 @@ static bool check_hw_exists(void)
 	return true;
 
 msr_fail:
-	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+	if (virt)
 		pr_cont("PMU not available due to virtualization, using software events only.\n");
-	} else {
-		pr_cont("Broken PMU hardware detected, using software events only.\n");
-		pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
-		       reg, val_new);
+	switch (status) {
+	case READ_FAIL:
+		if (virt)
+			pr_debug("Failed to read perfctr msr (MSR %x)\n", reg);
+		else
+			pr_err("Failed to read perfctr msr (MSR %x)\n", reg);
+		break;
+	case WRITE_FAIL:
+		if (virt)
+			pr_debug("Failed to write perfctr msr (MSR %x, wrote: %llx, read: %llx)\n",
+				 reg, val, val_new);
+		else
+			pr_err("Failed to write perfctr msr (MSR %x, wrote: %llx, read: %llx)\n",
+				 reg, val, val_new);
+		break;
+	case PMU_FAIL:
+		/* fall through for default message */
+	default:
+		if (virt)
+			pr_debug("Broken PMU hardware detected, using software events only.\n");
+		else
+			pr_cont("Broken PMU hardware detected, using software events only.\n");
 	}
 
 	return false;
-- 
2.19.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf/x86: descriptive failure messages for PMU init
  2019-04-12 19:09 [PATCH] perf/x86: descriptive failure messages for PMU init Bandan Das
@ 2019-04-15  9:48 ` Jiri Olsa
  2019-04-15 12:04 ` Peter Zijlstra
  1 sibling, 0 replies; 4+ messages in thread
From: Jiri Olsa @ 2019-04-15  9:48 UTC (permalink / raw)
  To: Bandan Das
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo, linux-kernel, x86

On Fri, Apr 12, 2019 at 03:09:17PM -0400, Bandan Das wrote:

SNIP

>  msr_fail:
> -	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> +	if (virt)
>  		pr_cont("PMU not available due to virtualization, using software events only.\n");
> -	} else {
> -		pr_cont("Broken PMU hardware detected, using software events only.\n");
> -		pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
> -		       reg, val_new);
> +	switch (status) {
> +	case READ_FAIL:
> +		if (virt)
> +			pr_debug("Failed to read perfctr msr (MSR %x)\n", reg);
> +		else
> +			pr_err("Failed to read perfctr msr (MSR %x)\n", reg);
> +		break;
> +	case WRITE_FAIL:
> +		if (virt)
> +			pr_debug("Failed to write perfctr msr (MSR %x, wrote: %llx, read: %llx)\n",
> +				 reg, val, val_new);
> +		else
> +			pr_err("Failed to write perfctr msr (MSR %x, wrote: %llx, read: %llx)\n",
> +				 reg, val, val_new);
> +		break;
> +	case PMU_FAIL:
> +		/* fall through for default message */
> +	default:
> +		if (virt)
> +			pr_debug("Broken PMU hardware detected, using software events only.\n");
> +		else
> +			pr_cont("Broken PMU hardware detected, using software events only.\n");

hum, why can't we have just one pr_err for both virt and bare metal?

jirka

>  	}
>  
>  	return false;
> -- 
> 2.19.2
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf/x86: descriptive failure messages for PMU init
  2019-04-12 19:09 [PATCH] perf/x86: descriptive failure messages for PMU init Bandan Das
  2019-04-15  9:48 ` Jiri Olsa
@ 2019-04-15 12:04 ` Peter Zijlstra
  2019-04-15 12:42   ` Bandan Das
  1 sibling, 1 reply; 4+ messages in thread
From: Peter Zijlstra @ 2019-04-15 12:04 UTC (permalink / raw)
  To: Bandan Das
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, Jiri Olsa, linux-kernel, x86

On Fri, Apr 12, 2019 at 03:09:17PM -0400, Bandan Das wrote:
> 
> There's a default warning message that gets printed, however,
> there are various failure conditions:
>  - a msr read can fail
>  - a msr write can fail
>  - a msr has an unexpected value
>  - all msrs have unexpected values (disable PMU)
> 
> Also, commit commit 005bd0077a79 ("perf/x86: Modify error message in
> virtualized environment") completely removed printing the msr in
> question but these messages could be helpful for debugging vPMUs as
> well. Add them back and change them to pr_debugs, this keeps the
> behavior the same for baremetal.
> 
> Lastly, use %llx to silence checkpatch

Yuck... if you're debugging a hypervisor, you can bloody well run your
own kernel with additional print slattered around.

The whole make an exception for virt bullshit was already pushing it,
this is just insane.

> @@ -266,12 +282,30 @@ static bool check_hw_exists(void)
>  	return true;
>  
>  msr_fail:
> -	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> +	if (virt)
>  		pr_cont("PMU not available due to virtualization, using software events only.\n");
> -	} else {
> -		pr_cont("Broken PMU hardware detected, using software events only.\n");
> -		pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
> -		       reg, val_new);
> +	switch (status) {
> +	case READ_FAIL:
> +		if (virt)
> +			pr_debug("Failed to read perfctr msr (MSR %x)\n", reg);
> +		else
> +			pr_err("Failed to read perfctr msr (MSR %x)\n", reg);
> +		break;
> +	case WRITE_FAIL:
> +		if (virt)
> +			pr_debug("Failed to write perfctr msr (MSR %x, wrote: %llx, read: %llx)\n",
> +				 reg, val, val_new);
> +		else
> +			pr_err("Failed to write perfctr msr (MSR %x, wrote: %llx, read: %llx)\n",
> +				 reg, val, val_new);
> +		break;
> +	case PMU_FAIL:
> +		/* fall through for default message */
> +	default:
> +		if (virt)
> +			pr_debug("Broken PMU hardware detected, using software events only.\n");
> +		else
> +			pr_cont("Broken PMU hardware detected, using software events only.\n");
>  	}

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf/x86: descriptive failure messages for PMU init
  2019-04-15 12:04 ` Peter Zijlstra
@ 2019-04-15 12:42   ` Bandan Das
  0 siblings, 0 replies; 4+ messages in thread
From: Bandan Das @ 2019-04-15 12:42 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, Arnaldo Carvalho de Melo, Jiri Olsa, linux-kernel, x86

Hi Peter,

Peter Zijlstra <peterz@infradead.org> writes:

> On Fri, Apr 12, 2019 at 03:09:17PM -0400, Bandan Das wrote:
>> 
>> There's a default warning message that gets printed, however,
>> there are various failure conditions:
>>  - a msr read can fail
>>  - a msr write can fail
>>  - a msr has an unexpected value
>>  - all msrs have unexpected values (disable PMU)
>> 
>> Also, commit commit 005bd0077a79 ("perf/x86: Modify error message in
>> virtualized environment") completely removed printing the msr in
>> question but these messages could be helpful for debugging vPMUs as
>> well. Add them back and change them to pr_debugs, this keeps the
>> behavior the same for baremetal.
>> 
>> Lastly, use %llx to silence checkpatch
>
> Yuck... if you're debugging a hypervisor, you can bloody well run your
> own kernel with additional print slattered around.
>
> The whole make an exception for virt bullshit was already pushing it,
> this is just insane.
>

The only virt specific parts are the pr_debugs which I can remove and
replace with unconditional pr_err()s as suggested by Jiri. Is that ok ?

Bandan

>> @@ -266,12 +282,30 @@ static bool check_hw_exists(void)
>>  	return true;
>>  
>>  msr_fail:
>> -	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
>> +	if (virt)
>>  		pr_cont("PMU not available due to virtualization, using software events only.\n");
>> -	} else {
>> -		pr_cont("Broken PMU hardware detected, using software events only.\n");
>> -		pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
>> -		       reg, val_new);
>> +	switch (status) {
>> +	case READ_FAIL:
>> +		if (virt)
>> +			pr_debug("Failed to read perfctr msr (MSR %x)\n", reg);
>> +		else
>> +			pr_err("Failed to read perfctr msr (MSR %x)\n", reg);
>> +		break;
>> +	case WRITE_FAIL:
>> +		if (virt)
>> +			pr_debug("Failed to write perfctr msr (MSR %x, wrote: %llx, read: %llx)\n",
>> +				 reg, val, val_new);
>> +		else
>> +			pr_err("Failed to write perfctr msr (MSR %x, wrote: %llx, read: %llx)\n",
>> +				 reg, val, val_new);
>> +		break;
>> +	case PMU_FAIL:
>> +		/* fall through for default message */
>> +	default:
>> +		if (virt)
>> +			pr_debug("Broken PMU hardware detected, using software events only.\n");
>> +		else
>> +			pr_cont("Broken PMU hardware detected, using software events only.\n");
>>  	}

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-04-15 12:42 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-12 19:09 [PATCH] perf/x86: descriptive failure messages for PMU init Bandan Das
2019-04-15  9:48 ` Jiri Olsa
2019-04-15 12:04 ` Peter Zijlstra
2019-04-15 12:42   ` Bandan Das

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).